]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: implement non-constant rotates using ROR on arm64
authorerifan01 <eric.fang@arm.com>
Sat, 30 Jun 2018 06:48:51 +0000 (06:48 +0000)
committerCherry Zhang <cherryyz@google.com>
Fri, 7 Sep 2018 14:52:02 +0000 (14:52 +0000)
Add some rules to match the Go code like:
y &= 63
x << y | x >> (64-y)
or
y &= 63
x >> y | x << (64-y)
as a ROR instruction. Make math/bits.RotateLeft faster on arm64.

Extends CL 132435 to arm64.

Benchmarks of math/bits.RotateLeftxxN:
name            old time/op       new time/op       delta
RotateLeft-8    3.548750ns +- 1%  2.003750ns +- 0%  -43.54%  (p=0.000 n=8+8)
RotateLeft8-8   3.925000ns +- 0%  3.925000ns +- 0%     ~     (p=1.000 n=8+8)
RotateLeft16-8  3.925000ns +- 0%  3.927500ns +- 0%     ~     (p=0.608 n=8+8)
RotateLeft32-8  3.925000ns +- 0%  2.002500ns +- 0%  -48.98%  (p=0.000 n=8+8)
RotateLeft64-8  3.536250ns +- 0%  2.003750ns +- 0%  -43.34%  (p=0.000 n=8+8)

Change-Id: I77622cd7f39b917427e060647321f5513973232c
Reviewed-on: https://go-review.googlesource.com/122542
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/mathbits.go

index db7064cff0e1bb2cb3947a012969d3364da7040c..192654158276d44680ce78391c173d1e489655fc 100644 (file)
@@ -195,7 +195,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpARM64FNMULS,
                ssa.OpARM64FNMULD,
                ssa.OpARM64FDIVS,
-               ssa.OpARM64FDIVD:
+               ssa.OpARM64FDIVD,
+               ssa.OpARM64ROR,
+               ssa.OpARM64RORW:
                r := v.Reg()
                r1 := v.Args[0].Reg()
                r2 := v.Args[1].Reg()
index 00ff7d4bd56bcd912e9179baec81687e8ec4d4a5..bb076f870849df7a203126adb9d7eb31ff82d55e 100644 (file)
@@ -3361,12 +3361,12 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpRotateLeft32, types.Types[TUINT32], args[0], args[1])
                },
-               sys.AMD64, sys.S390X)
+               sys.AMD64, sys.ARM64, sys.S390X)
        addF("math/bits", "RotateLeft64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpRotateLeft64, types.Types[TUINT64], args[0], args[1])
                },
-               sys.AMD64, sys.S390X)
+               sys.AMD64, sys.ARM64, sys.S390X)
        alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)
 
        makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
index 6c8f3860d186728c49c4698517975127f8cc6959..4c9d9a6f7a38f9e0c359c8fb2e3a04715b09cc18 100644 (file)
 (Round x) -> (FRINTAD x)
 (Trunc x) -> (FRINTZD x)
 
+// lowering rotates
+(RotateLeft32 x y) -> (RORW x (NEG <y.Type> y))
+(RotateLeft64 x y) -> (ROR x (NEG <y.Type> y))
+
 (Ctz64NonZero x) -> (Ctz64 x)
 (Ctz32NonZero x) -> (Ctz32 x)
 
 // shifts
 // hardware instruction uses only the low 6 bits of the shift
 // we compare to 64 to ensure Go semantics for large shifts
+// Rules about rotates with non-const shift are based on the following rules,
+// if the following rules change, please also modify the rules based on them.
 (Lsh64x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
 (Lsh64x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
 (Lsh64x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
 (ORNshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
 (ORNshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [-1])
 
-// Generate rotates
+// Generate rotates with const shift
 (ADDshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
 ( ORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
 (XORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
 ( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
 (XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
 
+(RORconst [c] (RORconst [d] x)) -> (RORconst [(c+d)&63] x)
+(RORWconst [c] (RORWconst [d] x)) -> (RORWconst [(c+d)&31] x)
+
+// Generate rotates with non-const shift.
+// These rules match the Go source code like
+//     y &= 63
+//     x << y | x >> (64-y)
+// "|" can also be "^" or "+".
+// As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
+((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
+       (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
+               (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
+       -> (ROR x (NEG <t> y))
+((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
+       (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
+               (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
+       -> (ROR x y)
+
+// These rules match the Go source code like
+//     y &= 31
+//     x << y | x >> (32-y)
+// "|" can also be "^" or "+".
+// As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
+((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
+       (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
+               (CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
+       -> (RORW x (NEG <t> y))
+((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
+       (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
+               (CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
+       -> (RORW x y)
+
 // Extract from reg pair
 (ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
 ( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
index eb0ad530a1a4b3fa47dde7b98710a2fc746f63b3..43230fbf70faf506e1a91bd781ef8243ed28d576 100644 (file)
@@ -248,6 +248,8 @@ func init() {
                {name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"},     // arg0 >> auxInt, unsigned
                {name: "SRA", argLength: 2, reg: gp21, asm: "ASR"},                        // arg0 >> arg1, signed, shift amount is mod 64
                {name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"},     // arg0 >> auxInt, signed
+               {name: "ROR", argLength: 2, reg: gp21, asm: "ROR"},                        // arg0 right rotate by (arg1 mod 64) bits
+               {name: "RORW", argLength: 2, reg: gp21, asm: "RORW"},                      // arg0 right rotate by (arg1 mod 32) bits
                {name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"},     // arg0 right rotate by auxInt bits
                {name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"},   // uint32(arg0) right rotate by auxInt bits
                {name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"},   // extract 64 bits from arg0:arg1 starting at lsb auxInt
index 5bf7021432699773a101bdd7a72b1580e0367e4c..30c57874f6a1736b7c3382cf7ddf34b9643675f3 100644 (file)
@@ -1139,6 +1139,8 @@ const (
        OpARM64SRLconst
        OpARM64SRA
        OpARM64SRAconst
+       OpARM64ROR
+       OpARM64RORW
        OpARM64RORconst
        OpARM64RORWconst
        OpARM64EXTRconst
@@ -15104,6 +15106,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "ROR",
+               argLen: 2,
+               asm:    arm64.AROR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "RORW",
+               argLen: 2,
+               asm:    arm64.ARORW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:    "RORconst",
                auxType: auxInt64,
index 219bc3676d8eea94f60520ef534b3e8047492af0..1dcbd9348099ac60af09d593734ab9394e511881 100644 (file)
@@ -16,7 +16,7 @@ var _ = types.TypeMem // in case not otherwise used
 func rewriteValueARM64(v *Value) bool {
        switch v.Op {
        case OpARM64ADD:
-               return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v)
+               return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v) || rewriteValueARM64_OpARM64ADD_20(v)
        case OpARM64ADDconst:
                return rewriteValueARM64_OpARM64ADDconst_0(v)
        case OpARM64ADDshiftLL:
@@ -256,7 +256,7 @@ func rewriteValueARM64(v *Value) bool {
        case OpARM64NotEqual:
                return rewriteValueARM64_OpARM64NotEqual_0(v)
        case OpARM64OR:
-               return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v) || rewriteValueARM64_OpARM64OR_20(v) || rewriteValueARM64_OpARM64OR_30(v)
+               return rewriteValueARM64_OpARM64OR_0(v) || rewriteValueARM64_OpARM64OR_10(v) || rewriteValueARM64_OpARM64OR_20(v) || rewriteValueARM64_OpARM64OR_30(v) || rewriteValueARM64_OpARM64OR_40(v)
        case OpARM64ORN:
                return rewriteValueARM64_OpARM64ORN_0(v)
        case OpARM64ORNshiftLL:
@@ -273,6 +273,10 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64ORshiftRA_0(v)
        case OpARM64ORshiftRL:
                return rewriteValueARM64_OpARM64ORshiftRL_0(v)
+       case OpARM64RORWconst:
+               return rewriteValueARM64_OpARM64RORWconst_0(v)
+       case OpARM64RORconst:
+               return rewriteValueARM64_OpARM64RORconst_0(v)
        case OpARM64SLL:
                return rewriteValueARM64_OpARM64SLL_0(v)
        case OpARM64SLLconst:
@@ -733,6 +737,10 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpPopCount32_0(v)
        case OpPopCount64:
                return rewriteValueARM64_OpPopCount64_0(v)
+       case OpRotateLeft32:
+               return rewriteValueARM64_OpRotateLeft32_0(v)
+       case OpRotateLeft64:
+               return rewriteValueARM64_OpRotateLeft64_0(v)
        case OpRound:
                return rewriteValueARM64_OpRound_0(v)
        case OpRound32F:
@@ -1090,6 +1098,10 @@ func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64ADD_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
        // match: (ADD x (NEG y))
        // cond:
        // result: (SUB x y)
@@ -1248,1935 +1260,1013 @@ func rewriteValueARM64_OpARM64ADD_10(v *Value) bool {
                v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ADDconst_0(v *Value) bool {
-       // match: (ADDconst [off1] (MOVDaddr [off2] {sym} ptr))
-       // cond:
-       // result: (MOVDaddr [off1+off2] {sym} ptr)
+       // match: (ADD (SLL x (ANDconst <t> [63] y)) (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x (NEG <t> y))
        for {
-               off1 := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64SLL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym := v_0.Aux
-               ptr := v_0.Args[0]
-               v.reset(OpARM64MOVDaddr)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               return true
-       }
-       // match: (ADDconst [0] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c+d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 63 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c + d
-               return true
-       }
-       // match: (ADDconst [c] (ADDconst [d] x))
-       // cond:
-       // result: (ADDconst [c+d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = c + d
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDconst [c] (SUBconst [d] x))
-       // cond:
-       // result: (ADDconst [c-d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SUBconst {
+               if v_1.Type != typ.UInt64 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = c - d
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ADDshiftLL (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ADDconst [c] (SLLconst <x.Type> x [d]))
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SRL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ADDshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ADDconst x [int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_0.Type != typ.UInt64 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDshiftLL [c] (SRLconst x [64-c]) x)
-       // cond:
-       // result: (RORconst [64-c] x)
-       for {
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64RORconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDshiftLL <t> [c] (UBFX [bfc] x) x)
-       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
-       // result: (RORWconst [32-c] x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               if v_1_0_1_0.AuxInt != 64 {
                        break
                }
-               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64RORWconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDshiftLL [c] (SRLconst x [64-c]) x2)
-       // cond:
-       // result: (EXTRconst [64-c] x2 x)
-       for {
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               if v_1_0_1_1.AuxInt != 63 {
                        break
                }
-               x := v_0.Args[0]
-               x2 := v.Args[1]
-               v.reset(OpARM64EXTRconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x2)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDshiftLL <t> [c] (UBFX [bfc] x) x2)
-       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
-       // result: (EXTRWconst [32-c] x2 x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               x2 := v.Args[1]
-               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               v.reset(OpARM64EXTRWconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x2)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ADDshiftRA_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ADDshiftRA (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ADDconst [c] (SRAconst <x.Type> x [d]))
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ADDshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ADDconst x [c>>uint64(d)])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = c >> uint64(d)
+               if v_1_1_0.Type != t {
+                       break
+               }
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_1_0_0.AuxInt != 64 {
+                       break
+               }
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1_0_1.Type != t {
+                       break
+               }
+               if v_1_1_0_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ADDshiftRL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ADDshiftRL (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ADDconst [c] (SRLconst <x.Type> x [d]))
+       // match: (ADD (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))) (SLL x (ANDconst <t> [63] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x (NEG <t> y))
        for {
-               d := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ADDshiftRL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ADDconst x [int64(uint64(c)>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDshiftRL [c] (SLLconst x [64-c]) x)
-       // cond:
-       // result: (RORconst [ c] x)
-       for {
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SRL {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               if v_0_0.Type != typ.UInt64 {
                        break
                }
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64RORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
-       // cond: c < 32 && t.Size() == 4
-       // result: (RORWconst [c] x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 32-c {
+               if v_0_0_1_0.AuxInt != 64 {
                        break
                }
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               if x != v_1.Args[0] {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               if !(c < 32 && t.Size() == 4) {
+               if v_0_0_1_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64RORWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64AND_0(v *Value) bool {
-       // match: (AND x (MOVDconst [c]))
-       // cond:
-       // result: (ANDconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (AND (MOVDconst [c]) x)
-       // cond:
-       // result: (ANDconst [c] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (AND x x)
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (AND x (MVN y))
-       // cond:
-       // result: (BIC x y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MVN {
+               if v_0_1_0.Type != t {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpARM64BIC)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (AND (MVN y) x)
-       // cond:
-       // result: (BIC x y)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MVN {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64BIC)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (AND x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ANDshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               if v_0_1_0_0.AuxInt != 64 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64ANDshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (AND x1:(SLLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ANDshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SLLconst {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if v_0_1_0_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64ANDshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (AND x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ANDshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLL {
                        break
                }
-               v.reset(OpARM64ANDshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (AND x1:(SRLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ANDshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRLconst {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64ANDshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (AND x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ANDshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               if v_1_1.Type != t {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_1_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64ANDshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64AND_10(v *Value) bool {
-       // match: (AND x1:(SRAconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ANDshiftRA x0 y [c])
+func rewriteValueARM64_OpARM64ADD_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (ADD (SRL <typ.UInt64> x (ANDconst <t> [63] y)) (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x y)
        for {
                _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRAconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRL {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64ANDshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ANDconst_0(v *Value) bool {
-       // match: (ANDconst [0] _)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               if v.AuxInt != 0 {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (ANDconst [-1] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != -1 {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 63 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c&d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c & d
-               return true
-       }
-       // match: (ANDconst [c] (ANDconst [d] x))
-       // cond:
-       // result: (ANDconst [c&d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               if v_1.Type != typ.UInt64 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & d
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDconst [c] (MOVWUreg x))
-       // cond:
-       // result: (ANDconst [c&(1<<32-1)] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVWUreg {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SLL {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<32 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDconst [c] (MOVHUreg x))
-       // cond:
-       // result: (ANDconst [c&(1<<16-1)] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVHUreg {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<16 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDconst [c] (MOVBUreg x))
-       // cond:
-       // result: (ANDconst [c&(1<<8-1)] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVBUreg {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<8 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDconst [ac] (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, ac, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
-       for {
-               ac := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_1_0_1.Type != t {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, ac, sc)) {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDconst [ac] (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, ac, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
-       for {
-               ac := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if v_1_0_1_0.AuxInt != 64 {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, ac, 0)) {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, 0))
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ANDshiftLL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ANDshiftLL (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ANDconst [c] (SLLconst <x.Type> x [d]))
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ANDshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ANDconst x [int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.AuxInt != 63 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDshiftLL x y:(SLLconst x [c]) [d])
-       // cond: c==d
-       // result: y
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SLLconst {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               c := y.AuxInt
-               if x != y.Args[0] {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               if !(c == d) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ANDshiftRA_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ANDshiftRA (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ANDconst [c] (SRAconst <x.Type> x [d]))
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ANDshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ANDconst x [c>>uint64(d)])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1_0.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c >> uint64(d)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDshiftRA x y:(SRAconst x [c]) [d])
-       // cond: c==d
-       // result: y
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SRAconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := y.AuxInt
-               if x != y.Args[0] {
+               if v_1_1_0_0.AuxInt != 64 {
                        break
                }
-               if !(c == d) {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
+               if v_1_1_0_1.Type != t {
+                       break
+               }
+               if v_1_1_0_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
+               v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ANDshiftRL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ANDshiftRL (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ANDconst [c] (SRLconst <x.Type> x [d]))
+       // match: (ADD (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))) (SRL <typ.UInt64> x (ANDconst <t> [63] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x y)
        for {
-               d := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ANDshiftRL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ANDconst x [int64(uint64(c)>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ANDshiftRL x y:(SRLconst x [c]) [d])
-       // cond: c==d
-       // result: y
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SRLconst {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SLL {
                        break
                }
-               c := y.AuxInt
-               if x != y.Args[0] {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               if !(c == d) {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64BIC_0(v *Value) bool {
-       // match: (BIC x (MOVDconst [c]))
-       // cond:
-       // result: (ANDconst [^c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_0_1_0.AuxInt != 64 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = ^c
-               v.AddArg(x)
-               return true
-       }
-       // match: (BIC x x)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (BIC x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (BICshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_0_1_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64BICshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (BIC x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (BICshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64BICshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (BIC x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (BICshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64BICshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64BICshiftLL_0(v *Value) bool {
-       // match: (BICshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ANDconst x [^int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = ^int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BICshiftLL x (SLLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [0])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_0_1_0_0.AuxInt != 64 {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               if !(c == d) {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64BICshiftRA_0(v *Value) bool {
-       // match: (BICshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ANDconst x [^(c>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_1_0_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = ^(c >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BICshiftRA x (SRAconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [0])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
+               if v_1.Op != OpARM64SRL {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if v_1.Type != typ.UInt64 {
                        break
                }
-               if !(c == d) {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64BICshiftRL_0(v *Value) bool {
-       // match: (BICshiftRL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ANDconst x [^int64(uint64(c)>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = ^int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (BICshiftRL x (SRLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [0])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1_1.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if v_1_1.AuxInt != 63 {
                        break
                }
-               if !(c == d) {
+               if y != v_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMN_0(v *Value) bool {
-       // match: (CMN x (MOVDconst [c]))
-       // cond:
-       // result: (CMNconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if !(cc.(Op) == OpARM64LessThanU) {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64CMNconst)
-               v.AuxInt = c
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (CMN (MOVDconst [c]) x)
-       // cond:
-       // result: (CMNconst [c] x)
+       // match: (ADD (SLL x (ANDconst <t> [31] y)) (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x (NEG <t> y))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64CMNconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMNW_0(v *Value) bool {
-       // match: (CMNW x (MOVDconst [c]))
-       // cond:
-       // result: (CMNWconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 31 {
+                       break
+               }
+               y := v_0_1.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64CMNWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (CMNW (MOVDconst [c]) x)
-       // cond:
-       // result: (CMNWconst [c] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1.Type != typ.UInt32 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64CMNWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMNWconst_0(v *Value) bool {
-       // match: (CMNWconst (MOVDconst [x]) [y])
-       // cond: int32(x)==int32(-y)
-       // result: (FlagEQ)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SRL {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) == int32(-y)) {
+               if v_1_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64FlagEQ)
-               return true
-       }
-       // match: (CMNWconst (MOVDconst [x]) [y])
-       // cond: int32(x)<int32(-y) && uint32(x)<uint32(-y)
-       // result: (FlagLT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(-y) && uint32(x) < uint32(-y)) {
+               if x != v_1_0_0.Args[0] {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMNWconst (MOVDconst [x]) [y])
-       // cond: int32(x)<int32(-y) && uint32(x)>uint32(-y)
-       // result: (FlagLT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(-y) && uint32(x) > uint32(-y)) {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64FlagLT_UGT)
-               return true
-       }
-       // match: (CMNWconst (MOVDconst [x]) [y])
-       // cond: int32(x)>int32(-y) && uint32(x)<uint32(-y)
-       // result: (FlagGT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(-y) && uint32(x) < uint32(-y)) {
+               if v_1_0_1_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64FlagGT_ULT)
-               return true
-       }
-       // match: (CMNWconst (MOVDconst [x]) [y])
-       // cond: int32(x)>int32(-y) && uint32(x)>uint32(-y)
-       // result: (FlagGT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(-y) && uint32(x) > uint32(-y)) {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64FlagGT_UGT)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMNconst_0(v *Value) bool {
-       // match: (CMNconst (MOVDconst [x]) [y])
-       // cond: int64(x)==int64(-y)
-       // result: (FlagEQ)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.AuxInt != 31 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x) == int64(-y)) {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FlagEQ)
-               return true
-       }
-       // match: (CMNconst (MOVDconst [x]) [y])
-       // cond: int64(x)<int64(-y) && uint64(x)<uint64(-y)
-       // result: (FlagLT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x) < int64(-y) && uint64(x) < uint64(-y)) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMNconst (MOVDconst [x]) [y])
-       // cond: int64(x)<int64(-y) && uint64(x)>uint64(-y)
-       // result: (FlagLT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x) < int64(-y) && uint64(x) > uint64(-y)) {
+               if v_1_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64FlagLT_UGT)
-               return true
-       }
-       // match: (CMNconst (MOVDconst [x]) [y])
-       // cond: int64(x)>int64(-y) && uint64(x)<uint64(-y)
-       // result: (FlagGT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x) > int64(-y) && uint64(x) < uint64(-y)) {
+               if v_1_1_0_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64FlagGT_ULT)
-               return true
-       }
-       // match: (CMNconst (MOVDconst [x]) [y])
-       // cond: int64(x)>int64(-y) && uint64(x)>uint64(-y)
-       // result: (FlagGT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x) > int64(-y) && uint64(x) > uint64(-y)) {
+               if v_1_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64FlagGT_UGT)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMP_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMP x (MOVDconst [c]))
-       // cond:
-       // result: (CMPconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1_0_1.AuxInt != 31 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64CMPconst)
-               v.AuxInt = c
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (CMP (MOVDconst [c]) x)
-       // cond:
-       // result: (InvertFlags (CMPconst [c] x))
+       // match: (ADD (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))) (SLL x (ANDconst <t> [31] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x (NEG <t> y))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (CMP x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (CMPshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SRL {
                        break
                }
-               v.reset(OpARM64CMPshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (CMP x0:(SLLconst [c] y) x1)
-       // cond: clobberIfDead(x0)
-       // result: (InvertFlags (CMPshiftLL x1 y [c]))
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               if x0.Op != OpARM64SLLconst {
+               if v_0_0.Type != typ.UInt32 {
                        break
                }
-               c := x0.AuxInt
-               y := x0.Args[0]
-               x1 := v.Args[1]
-               if !(clobberIfDead(x0)) {
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPshiftLL, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x1)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (CMP x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (CMPshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               x := v_0_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64CMPshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (CMP x0:(SRLconst [c] y) x1)
-       // cond: clobberIfDead(x0)
-       // result: (InvertFlags (CMPshiftRL x1 y [c]))
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               if x0.Op != OpARM64SRLconst {
+               if v_0_0_1_0.AuxInt != 32 {
                        break
                }
-               c := x0.AuxInt
-               y := x0.Args[0]
-               x1 := v.Args[1]
-               if !(clobberIfDead(x0)) {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPshiftRL, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x1)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (CMP x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (CMPshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_0_1_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64CMPshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (CMP x0:(SRAconst [c] y) x1)
-       // cond: clobberIfDead(x0)
-       // result: (InvertFlags (CMPshiftRA x1 y [c]))
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               if x0.Op != OpARM64SRAconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               c := x0.AuxInt
-               y := x0.Args[0]
-               x1 := v.Args[1]
-               if !(clobberIfDead(x0)) {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPshiftRA, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x1)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMPW_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPW x (MOVDconst [c]))
-       // cond:
-       // result: (CMPWconst [int64(int32(c))] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64CMPWconst)
-               v.AuxInt = int64(int32(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (CMPW (MOVDconst [c]) x)
-       // cond:
-       // result: (InvertFlags (CMPWconst [int64(int32(c))] x))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0.Type != t {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPWconst, types.TypeFlags)
-               v0.AuxInt = int64(int32(c))
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMPWconst_0(v *Value) bool {
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)==int32(y)
-       // result: (FlagEQ)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) == int32(y)) {
+               if v_0_1_0_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64FlagEQ)
-               return true
-       }
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
-       // result: (FlagLT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
-       // result: (FlagLT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0_1.AuxInt != 31 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FlagLT_UGT)
-               return true
-       }
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
-       // result: (FlagGT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLL {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FlagGT_ULT)
-               return true
-       }
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
-       // result: (FlagGT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
+               if v_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64FlagGT_UGT)
-               return true
-       }
-       // match: (CMPWconst (MOVBUreg _) [c])
-       // cond: 0xff < int32(c)
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVBUreg {
+               if v_1_1.AuxInt != 31 {
                        break
                }
-               if !(0xff < int32(c)) {
+               if y != v_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (CMPWconst (MOVHUreg _) [c])
-       // cond: 0xffff < int32(c)
-       // result: (FlagLT_ULT)
+       // match: (ADD (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y)) (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x y)
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVHUreg {
+               if v_0.Op != OpARM64SRL {
                        break
                }
-               if !(0xffff < int32(c)) {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64CMPconst_0(v *Value) bool {
-       // match: (CMPconst (MOVDconst [x]) [y])
-       // cond: x==y
-       // result: (FlagEQ)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               x := v_0.AuxInt
-               if !(x == y) {
+               x := v_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64FlagEQ)
-               return true
-       }
-       // match: (CMPconst (MOVDconst [x]) [y])
-       // cond: x<y && uint64(x)<uint64(y)
-       // result: (FlagLT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 31 {
                        break
                }
-               x := v_0.AuxInt
-               if !(x < y && uint64(x) < uint64(y)) {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPconst (MOVDconst [x]) [y])
-       // cond: x<y && uint64(x)>uint64(y)
-       // result: (FlagLT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1.Type != typ.UInt32 {
                        break
                }
-               x := v_0.AuxInt
-               if !(x < y && uint64(x) > uint64(y)) {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SLL {
                        break
                }
-               v.reset(OpARM64FlagLT_UGT)
-               return true
-       }
-       // match: (CMPconst (MOVDconst [x]) [y])
-       // cond: x>y && uint64(x)<uint64(y)
-       // result: (FlagGT_ULT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               x := v_0.AuxInt
-               if !(x > y && uint64(x) < uint64(y)) {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64FlagGT_ULT)
-               return true
-       }
-       // match: (CMPconst (MOVDconst [x]) [y])
-       // cond: x>y && uint64(x)>uint64(y)
-       // result: (FlagGT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1_0_1.Type != t {
                        break
                }
-               x := v_0.AuxInt
-               if !(x > y && uint64(x) > uint64(y)) {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64FlagGT_UGT)
-               return true
-       }
-       // match: (CMPconst (MOVBUreg _) [c])
-       // cond: 0xff < c
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVBUreg {
+               if v_1_0_1_0.AuxInt != 32 {
                        break
                }
-               if !(0xff < c) {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPconst (MOVHUreg _) [c])
-       // cond: 0xffff < c
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVHUreg {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               if !(0xffff < c) {
+               if v_1_0_1_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPconst (MOVWUreg _) [c])
-       // cond: 0xffffffff < c
-       // result: (FlagLT_ULT)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVWUreg {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               if !(0xffffffff < c) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
-               return true
-       }
-       // match: (CMPconst (ANDconst _ [m]) [n])
-       // cond: 0 <= m && m < n
-       // result: (FlagLT_ULT)
-       for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
+               if v_1_1_0.Type != t {
+                       break
+               }
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1_0_1.Type != t {
+                       break
+               }
+               if v_1_1_0_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (CMPconst (SRLconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n)
-       // result: (FlagLT_ULT)
+       // match: (ADD (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x y)
        for {
-               n := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n)) {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64FlagLT_ULT)
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SLL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
+                       break
+               }
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_0_0_1_1.Type != t {
+                       break
+               }
+               if v_0_0_1_1.AuxInt != 31 {
+                       break
+               }
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
+                       break
+               }
+               if v_0_1.AuxInt != 64 {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
+                       break
+               }
+               if v_0_1_0.Type != t {
+                       break
+               }
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_0_1_0_1.Type != t {
+                       break
+               }
+               if v_0_1_0_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_0_1_0_1.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRL {
+                       break
+               }
+               if v_1.Type != typ.UInt32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               if v_1_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64CMPshiftLL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPshiftLL (MOVDconst [c]) x [d])
+func rewriteValueARM64_OpARM64ADDconst_0(v *Value) bool {
+       // match: (ADDconst [off1] (MOVDaddr [off2] {sym} ptr))
        // cond:
-       // result: (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
+       // result: (MOVDaddr [off1+off2] {sym} ptr)
        for {
-               d := v.AuxInt
-               _ = v.Args[1]
+               off1 := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v0.AuxInt = c
-               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v1.AuxInt = d
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               off2 := v_0.AuxInt
+               sym := v_0.Aux
+               ptr := v_0.Args[0]
+               v.reset(OpARM64MOVDaddr)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
                return true
        }
-       // match: (CMPshiftLL x (MOVDconst [c]) [d])
+       // match: (ADDconst [0] x)
        // cond:
-       // result: (CMPconst x [int64(uint64(c)<<uint64(d))])
+       // result: x
        for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v.AuxInt != 0 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64CMPconst)
-               v.AuxInt = int64(uint64(c) << uint64(d))
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64CMPshiftRA_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPshiftRA (MOVDconst [c]) x [d])
+       // match: (ADDconst [c] (MOVDconst [d]))
        // cond:
-       // result: (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
+       // result: (MOVDconst [c+d])
        for {
-               d := v.AuxInt
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v0.AuxInt = c
-               v1 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
-               v1.AuxInt = d
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c + d
                return true
        }
-       // match: (CMPshiftRA x (MOVDconst [c]) [d])
+       // match: (ADDconst [c] (ADDconst [d] x))
        // cond:
-       // result: (CMPconst x [c>>uint64(d)])
+       // result: (ADDconst [c+d] x)
        for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64CMPconst)
-               v.AuxInt = c >> uint64(d)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c + d
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDconst [c] (SUBconst [d] x))
+       // cond:
+       // result: (ADDconst [c-d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SUBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c - d
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64CMPshiftRL_0(v *Value) bool {
+func rewriteValueARM64_OpARM64ADDshiftLL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (CMPshiftRL (MOVDconst [c]) x [d])
+       // match: (ADDshiftLL (MOVDconst [c]) x [d])
        // cond:
-       // result: (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
+       // result: (ADDconst [c] (SLLconst <x.Type> x [d]))
        for {
                d := v.AuxInt
                _ = v.Args[1]
@@ -3186,19 +2276,17 @@ func rewriteValueARM64_OpARM64CMPshiftRL_0(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
-               v0.AuxInt = c
-               v1 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
-               v1.AuxInt = d
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (CMPshiftRL x (MOVDconst [c]) [d])
+       // match: (ADDshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: (CMPconst x [int64(uint64(c)>>uint64(d))])
+       // result: (ADDconst x [int64(uint64(c)<<uint64(d))])
        for {
                d := v.AuxInt
                _ = v.Args[1]
@@ -3208,287 +2296,259 @@ func rewriteValueARM64_OpARM64CMPshiftRL_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpARM64CMPconst)
-               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64CSEL_0(v *Value) bool {
-       // match: (CSEL {cc} x (MOVDconst [0]) flag)
+       // match: (ADDshiftLL [c] (SRLconst x [64-c]) x)
        // cond:
-       // result: (CSEL0 {cc} x flag)
+       // result: (RORconst [64-c] x)
        for {
-               cc := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               flag := v.Args[2]
-               v.reset(OpARM64CSEL0)
-               v.Aux = cc
+               x := v_0.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpARM64RORconst)
+               v.AuxInt = 64 - c
                v.AddArg(x)
-               v.AddArg(flag)
                return true
        }
-       // match: (CSEL {cc} (MOVDconst [0]) y flag)
-       // cond:
-       // result: (CSEL0 {arm64Negate(cc.(Op))} y flag)
+       // match: (ADDshiftLL <t> [c] (UBFX [bfc] x) x)
+       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
+       // result: (RORWconst [32-c] x)
        for {
-               cc := v.Aux
-               _ = v.Args[2]
+               t := v.Type
+               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               if v_0.AuxInt != 0 {
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               y := v.Args[1]
-               flag := v.Args[2]
-               v.reset(OpARM64CSEL0)
-               v.Aux = arm64Negate(cc.(Op))
-               v.AddArg(y)
-               v.AddArg(flag)
-               return true
-       }
-       // match: (CSEL {cc} x y (InvertFlags cmp))
-       // cond:
-       // result: (CSEL {arm64Invert(cc.(Op))} x y cmp)
-       for {
-               cc := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64InvertFlags {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               cmp := v_2.Args[0]
-               v.reset(OpARM64CSEL)
-               v.Aux = arm64Invert(cc.(Op))
-               v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(cmp)
-               return true
-       }
-       // match: (CSEL {cc} x _ flag)
-       // cond: ccARM64Eval(cc, flag) > 0
-       // result: x
-       for {
-               cc := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               flag := v.Args[2]
-               if !(ccARM64Eval(cc, flag) > 0) {
+               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64RORWconst)
+               v.AuxInt = 32 - c
                v.AddArg(x)
                return true
        }
-       // match: (CSEL {cc} _ y flag)
-       // cond: ccARM64Eval(cc, flag) < 0
-       // result: y
-       for {
-               cc := v.Aux
-               _ = v.Args[2]
-               y := v.Args[1]
-               flag := v.Args[2]
-               if !(ccARM64Eval(cc, flag) < 0) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (CSEL {cc} x y (CMPWconst [0] bool))
-       // cond: cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil
-       // result: (CSEL {bool.Op} x y flagArg(bool))
+       // match: (ADDshiftLL [c] (SRLconst x [64-c]) x2)
+       // cond:
+       // result: (EXTRconst [64-c] x2 x)
        for {
-               cc := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64CMPWconst {
-                       break
-               }
-               if v_2.AuxInt != 0 {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               bool := v_2.Args[0]
-               if !(cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil) {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               v.reset(OpARM64CSEL)
-               v.Aux = bool.Op
+               x := v_0.Args[0]
+               x2 := v.Args[1]
+               v.reset(OpARM64EXTRconst)
+               v.AuxInt = 64 - c
+               v.AddArg(x2)
                v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(flagArg(bool))
                return true
        }
-       // match: (CSEL {cc} x y (CMPWconst [0] bool))
-       // cond: cc.(Op) == OpARM64Equal && flagArg(bool) != nil
-       // result: (CSEL {arm64Negate(bool.Op)} x y flagArg(bool))
+       // match: (ADDshiftLL <t> [c] (UBFX [bfc] x) x2)
+       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
+       // result: (EXTRWconst [32-c] x2 x)
        for {
-               cc := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64CMPWconst {
-                       break
-               }
-               if v_2.AuxInt != 0 {
+               t := v.Type
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               bool := v_2.Args[0]
-               if !(cc.(Op) == OpARM64Equal && flagArg(bool) != nil) {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               x2 := v.Args[1]
+               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
                        break
                }
-               v.reset(OpARM64CSEL)
-               v.Aux = arm64Negate(bool.Op)
+               v.reset(OpARM64EXTRWconst)
+               v.AuxInt = 32 - c
+               v.AddArg(x2)
                v.AddArg(x)
-               v.AddArg(y)
-               v.AddArg(flagArg(bool))
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64CSEL0_0(v *Value) bool {
-       // match: (CSEL0 {cc} x (InvertFlags cmp))
+func rewriteValueARM64_OpARM64ADDshiftRA_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ADDshiftRA (MOVDconst [c]) x [d])
        // cond:
-       // result: (CSEL0 {arm64Invert(cc.(Op))} x cmp)
+       // result: (ADDconst [c] (SRAconst <x.Type> x [d]))
        for {
-               cc := v.Aux
+               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64InvertFlags {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               cmp := v_1.Args[0]
-               v.reset(OpARM64CSEL0)
-               v.Aux = arm64Invert(cc.(Op))
-               v.AddArg(x)
-               v.AddArg(cmp)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (CSEL0 {cc} x flag)
-       // cond: ccARM64Eval(cc, flag) > 0
-       // result: x
+       // match: (ADDshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ADDconst x [c>>uint64(d)])
        for {
-               cc := v.Aux
+               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
-               flag := v.Args[1]
-               if !(ccARM64Eval(cc, flag) > 0) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c >> uint64(d)
                v.AddArg(x)
                return true
        }
-       // match: (CSEL0 {cc} _ flag)
-       // cond: ccARM64Eval(cc, flag) < 0
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64ADDshiftRL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ADDshiftRL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ADDconst [c] (SRLconst <x.Type> x [d]))
        for {
-               cc := v.Aux
+               d := v.AuxInt
                _ = v.Args[1]
-               flag := v.Args[1]
-               if !(ccARM64Eval(cc, flag) < 0) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (CSEL0 {cc} x (CMPWconst [0] bool))
-       // cond: cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil
-       // result: (CSEL0 {bool.Op} x flagArg(bool))
+       // match: (ADDshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ADDconst x [int64(uint64(c)>>uint64(d))])
        for {
-               cc := v.Aux
+               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64CMPWconst {
-                       break
-               }
-               if v_1.AuxInt != 0 {
-                       break
-               }
-               bool := v_1.Args[0]
-               if !(cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil) {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64CSEL0)
-               v.Aux = bool.Op
+               c := v_1.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
                v.AddArg(x)
-               v.AddArg(flagArg(bool))
                return true
        }
-       // match: (CSEL0 {cc} x (CMPWconst [0] bool))
-       // cond: cc.(Op) == OpARM64Equal && flagArg(bool) != nil
-       // result: (CSEL0 {arm64Negate(bool.Op)} x flagArg(bool))
+       // match: (ADDshiftRL [c] (SLLconst x [64-c]) x)
+       // cond:
+       // result: (RORconst [ c] x)
        for {
-               cc := v.Aux
+               c := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64CMPWconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               bool := v_1.Args[0]
-               if !(cc.(Op) == OpARM64Equal && flagArg(bool) != nil) {
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               v.reset(OpARM64CSEL0)
-               v.Aux = arm64Negate(bool.Op)
+               v.reset(OpARM64RORconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(flagArg(bool))
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64DIV_0(v *Value) bool {
-       // match: (DIV (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c/d])
+       // match: (ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
+       // cond: c < 32 && t.Size() == 4
+       // result: (RORWconst [c] x)
        for {
+               t := v.Type
+               c := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
+               if v_0.AuxInt != 32-c {
+                       break
+               }
+               x := v_0.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64MOVWUreg {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c / d
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c < 32 && t.Size() == 4) {
+                       break
+               }
+               v.reset(OpARM64RORWconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64DIVW_0(v *Value) bool {
-       // match: (DIVW (MOVDconst [c]) (MOVDconst [d]))
+func rewriteValueARM64_OpARM64AND_0(v *Value) bool {
+       // match: (AND x (MOVDconst [c]))
        // cond:
-       // result: (MOVDconst [int64(int32(c)/int32(d))])
+       // result: (ANDconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (AND (MOVDconst [c]) x)
+       // cond:
+       // result: (ANDconst [c] x)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
@@ -3496,50 +2556,61 @@ func rewriteValueARM64_OpARM64DIVW_0(v *Value) bool {
                        break
                }
                c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (AND x x)
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) / int32(d))
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64EON_0(v *Value) bool {
-       // match: (EON x (MOVDconst [c]))
+       // match: (AND x (MVN y))
        // cond:
-       // result: (XORconst [^c] x)
+       // result: (BIC x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64MVN {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64XORconst)
-               v.AuxInt = ^c
+               y := v_1.Args[0]
+               v.reset(OpARM64BIC)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (EON x x)
+       // match: (AND (MVN y) x)
        // cond:
-       // result: (MOVDconst [-1])
+       // result: (BIC x y)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MVN {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64BIC)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (EON x0 x1:(SLLconst [c] y))
+       // match: (AND x0 x1:(SLLconst [c] y))
        // cond: clobberIfDead(x1)
-       // result: (EONshiftLL x0 y [c])
+       // result: (ANDshiftLL x0 y [c])
        for {
                _ = v.Args[1]
                x0 := v.Args[0]
@@ -3552,41 +2623,41 @@ func rewriteValueARM64_OpARM64EON_0(v *Value) bool {
                if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64EONshiftLL)
+               v.reset(OpARM64ANDshiftLL)
                v.AuxInt = c
                v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       // match: (EON x0 x1:(SRLconst [c] y))
+       // match: (AND x1:(SLLconst [c] y) x0)
        // cond: clobberIfDead(x1)
-       // result: (EONshiftRL x0 y [c])
+       // result: (ANDshiftLL x0 y [c])
        for {
                _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
                c := x1.AuxInt
                y := x1.Args[0]
+               x0 := v.Args[1]
                if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64EONshiftRL)
+               v.reset(OpARM64ANDshiftLL)
                v.AuxInt = c
                v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       // match: (EON x0 x1:(SRAconst [c] y))
+       // match: (AND x0 x1:(SRLconst [c] y))
        // cond: clobberIfDead(x1)
-       // result: (EONshiftRA x0 y [c])
+       // result: (ANDshiftRL x0 y [c])
        for {
                _ = v.Args[1]
                x0 := v.Args[0]
                x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               if x1.Op != OpARM64SRLconst {
                        break
                }
                c := x1.AuxInt
@@ -3594,4570 +2665,7022 @@ func rewriteValueARM64_OpARM64EON_0(v *Value) bool {
                if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64EONshiftRA)
+               v.reset(OpARM64ANDshiftRL)
                v.AuxInt = c
                v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64EONshiftLL_0(v *Value) bool {
-       // match: (EONshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (XORconst x [^int64(uint64(c)<<uint64(d))])
+       // match: (AND x1:(SRLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ANDshiftRL x0 y [c])
        for {
-               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64XORconst)
-               v.AuxInt = ^int64(uint64(c) << uint64(d))
-               v.AddArg(x)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ANDshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (EONshiftLL x (SLLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
+       // match: (AND x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ANDshiftRA x0 y [c])
        for {
-               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               if !(c == d) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
+               v.reset(OpARM64ANDshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64EONshiftRA_0(v *Value) bool {
-       // match: (EONshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (XORconst x [^(c>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpARM64XORconst)
-               v.AuxInt = ^(c >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (EONshiftRA x (SRAconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
+func rewriteValueARM64_OpARM64AND_10(v *Value) bool {
+       // match: (AND x1:(SRAconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ANDshiftRA x0 y [c])
        for {
-               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               if !(c == d) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
+               v.reset(OpARM64ANDshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64EONshiftRL_0(v *Value) bool {
-       // match: (EONshiftRL x (MOVDconst [c]) [d])
+func rewriteValueARM64_OpARM64ANDconst_0(v *Value) bool {
+       // match: (ANDconst [0] _)
        // cond:
-       // result: (XORconst x [^int64(uint64(c)>>uint64(d))])
+       // result: (MOVDconst [0])
        for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v.AuxInt != 0 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64XORconst)
-               v.AuxInt = ^int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (EONshiftRL x (SRLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(c == d) {
+       // match: (ANDconst [-1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != -1 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64Equal_0(v *Value) bool {
-       // match: (Equal (FlagEQ))
+       // match: (ANDconst [c] (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVDconst [c&d])
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
+               d := v_0.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.AuxInt = c & d
                return true
        }
-       // match: (Equal (FlagLT_ULT))
+       // match: (ANDconst [c] (ANDconst [d] x))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (ANDconst [c&d] x)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & d
+               v.AddArg(x)
                return true
        }
-       // match: (Equal (FlagLT_UGT))
+       // match: (ANDconst [c] (MOVWUreg x))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (ANDconst [c&(1<<32-1)] x)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               if v_0.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<32 - 1)
+               v.AddArg(x)
                return true
        }
-       // match: (Equal (FlagGT_ULT))
+       // match: (ANDconst [c] (MOVHUreg x))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (ANDconst [c&(1<<16-1)] x)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if v_0.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<16 - 1)
+               v.AddArg(x)
                return true
        }
-       // match: (Equal (FlagGT_UGT))
+       // match: (ANDconst [c] (MOVBUreg x))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (ANDconst [c&(1<<8-1)] x)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_0.Op != OpARM64MOVBUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<8 - 1)
+               v.AddArg(x)
                return true
        }
-       // match: (Equal (InvertFlags x))
-       // cond:
-       // result: (Equal x)
+       // match: (ANDconst [ac] (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, ac, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
        for {
+               ac := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
+               sc := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpARM64Equal)
+               if !(isARM64BFMask(sc, ac, sc)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, sc))
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FADDD_0(v *Value) bool {
-       // match: (FADDD a (FMULD x y))
-       // cond:
-       // result: (FMADDD a x y)
+       // match: (ANDconst [ac] (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, ac, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
        for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMULD {
+               ac := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMADDD)
-               v.AddArg(a)
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, ac, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, 0))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (FADDD (FMULD x y) a)
+       return false
+}
+func rewriteValueARM64_OpARM64ANDshiftLL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ANDshiftLL (MOVDconst [c]) x [d])
        // cond:
-       // result: (FMADDD a x y)
+       // result: (ANDconst [c] (SLLconst <x.Type> x [d]))
        for {
+               d := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FMULD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FMADDD)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (FADDD a (FNMULD x y))
+       // match: (ANDshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: (FMSUBD a x y)
+       // result: (ANDconst x [int64(uint64(c)<<uint64(d))])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               a := v.Args[0]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNMULD {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMSUBD)
-               v.AddArg(a)
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (FADDD (FNMULD x y) a)
-       // cond:
-       // result: (FMSUBD a x y)
+       // match: (ANDshiftLL x y:(SLLconst x [c]) [d])
+       // cond: c==d
+       // result: y
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNMULD {
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SLLconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FMSUBD)
-               v.AddArg(a)
-               v.AddArg(x)
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
-       // match: (FADDS a (FMULS x y))
+func rewriteValueARM64_OpARM64ANDshiftRA_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ANDshiftRA (MOVDconst [c]) x [d])
        // cond:
-       // result: (FMADDS a x y)
+       // result: (ANDconst [c] (SRAconst <x.Type> x [d]))
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMULS {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMADDS)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (FADDS (FMULS x y) a)
+       // match: (ANDshiftRA x (MOVDconst [c]) [d])
        // cond:
-       // result: (FMADDS a x y)
+       // result: (ANDconst x [c>>uint64(d)])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FMULS {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FMADDS)
-               v.AddArg(a)
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c >> uint64(d)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (FADDS a (FNMULS x y))
-       // cond:
-       // result: (FMSUBS a x y)
+       // match: (ANDshiftRA x y:(SRAconst x [c]) [d])
+       // cond: c==d
+       // result: y
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNMULS {
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRAconst {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMSUBS)
-               v.AddArg(a)
-               v.AddArg(x)
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (FADDS (FNMULS x y) a)
+       return false
+}
+func rewriteValueARM64_OpARM64ANDshiftRL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ANDshiftRL (MOVDconst [c]) x [d])
        // cond:
-       // result: (FMSUBS a x y)
+       // result: (ANDconst [c] (SRLconst <x.Type> x [d]))
        for {
+               d := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNMULS {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FMSUBS)
-               v.AddArg(a)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ANDshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ANDconst x [int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
                v.AddArg(x)
+               return true
+       }
+       // match: (ANDshiftRL x y:(SRLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRLconst {
+                       break
+               }
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (FMOVDfpgp <t> (Arg [off] {sym}))
+func rewriteValueARM64_OpARM64BIC_0(v *Value) bool {
+       // match: (BIC x (MOVDconst [c]))
        // cond:
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       // result: (ANDconst [^c] x)
        for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpArg {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off := v_0.AuxInt
-               sym := v_0.Aux
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = ^c
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (FMOVDgpfp <t> (Arg [off] {sym}))
+       // match: (BIC x x)
        // cond:
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       // result: (MOVDconst [0])
        for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpArg {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               off := v_0.AuxInt
-               sym := v_0.Aux
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FMOVDload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVDload [off1+off2] {sym} ptr mem)
+       // match: (BIC x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (BICshiftLL x0 y [c])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64FMOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64BICshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (FMOVDload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (FMOVDloadidx ptr idx mem)
+       // match: (BIC x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (BICshiftRL x0 y [c])
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64FMOVDloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64BICshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (BIC x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (BICshiftRA x0 y [c])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64FMOVDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64BICshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVDloadidx_0(v *Value) bool {
-       // match: (FMOVDloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64BICshiftLL_0(v *Value) bool {
+       // match: (BICshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: (FMOVDload [c] ptr mem)
+       // result: (ANDconst x [^int64(uint64(c)<<uint64(d))])
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVDload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = ^int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
        }
-       // match: (FMOVDloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (FMOVDload [c] ptr mem)
+       // match: (BICshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVDload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVDstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (FMOVDstore ptr (FMOVDgpfp val) mem)
+func rewriteValueARM64_OpARM64BICshiftRA_0(v *Value) bool {
+       // match: (BICshiftRA x (MOVDconst [c]) [d])
        // cond:
-       // result: (MOVDstore ptr val mem)
+       // result: (ANDconst x [^(c>>uint64(d))])
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMOVDgpfp {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstore)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = ^(c >> uint64(d))
+               v.AddArg(x)
                return true
        }
-       // match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVDstore [off1+off2] {sym} ptr val mem)
+       // match: (BICshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FMOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (FMOVDstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (FMOVDstoreidx ptr idx val mem)
+       return false
+}
+func rewriteValueARM64_OpARM64BICshiftRL_0(v *Value) bool {
+       // match: (BICshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ANDconst x [^int64(uint64(c)>>uint64(d))])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64FMOVDstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = ^int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
                return true
        }
-       // match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (BICshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64FMOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVDstoreidx_0(v *Value) bool {
-       // match: (FMOVDstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64CMN_0(v *Value) bool {
+       // match: (CMN x (MOVDconst [c]))
        // cond:
-       // result: (FMOVDstore [c] ptr val mem)
+       // result: (CMNconst [c] x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64FMOVDstore)
+               v.reset(OpARM64CMNconst)
                v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.AddArg(x)
                return true
        }
-       // match: (FMOVDstoreidx (MOVDconst [c]) idx val mem)
+       // match: (CMN (MOVDconst [c]) x)
        // cond:
-       // result: (FMOVDstore [c] idx val mem)
+       // result: (CMNconst [c] x)
        for {
-               _ = v.Args[3]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64FMOVDstore)
+               x := v.Args[1]
+               v.reset(OpARM64CMNconst)
                v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVSload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVSload [off1+off2] {sym} ptr mem)
+func rewriteValueARM64_OpARM64CMNW_0(v *Value) bool {
+       // match: (CMNW x (MOVDconst [c]))
+       // cond:
+       // result: (CMNWconst [c] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64FMOVSload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               v.reset(OpARM64CMNWconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (FMOVSload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (FMOVSloadidx ptr idx mem)
+       // match: (CMNW (MOVDconst [c]) x)
+       // cond:
+       // result: (CMNWconst [c] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64FMOVSloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64CMNWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
        }
-       // match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64CMNWconst_0(v *Value) bool {
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)==int32(-y)
+       // result: (FlagEQ)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x := v_0.AuxInt
+               if !(int32(x) == int32(-y)) {
                        break
                }
-               v.reset(OpARM64FMOVSload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64FlagEQ)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FMOVSloadidx_0(v *Value) bool {
-       // match: (FMOVSloadidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (FMOVSload [c] ptr mem)
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(-y) && uint32(x)<uint32(-y)
+       // result: (FlagLT_ULT)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVSload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v_0.AuxInt
+               if !(int32(x) < int32(-y) && uint32(x) < uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (FMOVSloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (FMOVSload [c] ptr mem)
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(-y) && uint32(x)>uint32(-y)
+       // result: (FlagLT_UGT)
        for {
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVSload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v_0.AuxInt
+               if !(int32(x) < int32(-y) && uint32(x) > uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)>int32(-y) && uint32(x)<uint32(-y)
+       // result: (FlagGT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) > int32(-y) && uint32(x) < uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_ULT)
+               return true
+       }
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)>int32(-y) && uint32(x)>uint32(-y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) > int32(-y) && uint32(x) > uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVSstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVSstore [off1+off2] {sym} ptr val mem)
+func rewriteValueARM64_OpARM64CMNconst_0(v *Value) bool {
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)==int64(-y)
+       // result: (FlagEQ)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x := v_0.AuxInt
+               if !(int64(x) == int64(-y)) {
                        break
                }
-               v.reset(OpARM64FMOVSstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64FlagEQ)
                return true
        }
-       // match: (FMOVSstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (FMOVSstoreidx ptr idx val mem)
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)<int64(-y) && uint64(x)<uint64(-y)
+       // result: (FlagLT_ULT)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               x := v_0.AuxInt
+               if !(int64(x) < int64(-y) && uint64(x) < uint64(-y)) {
                        break
                }
-               v.reset(OpARM64FMOVSstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)<int64(-y) && uint64(x)>uint64(-y)
+       // result: (FlagLT_UGT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               x := v_0.AuxInt
+               if !(int64(x) < int64(-y) && uint64(x) > uint64(-y)) {
                        break
                }
-               v.reset(OpARM64FMOVSstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)>int64(-y) && uint64(x)<uint64(-y)
+       // result: (FlagGT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) > int64(-y) && uint64(x) < uint64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_ULT)
+               return true
+       }
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)>int64(-y) && uint64(x)>uint64(-y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) > int64(-y) && uint64(x) > uint64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FMOVSstoreidx_0(v *Value) bool {
-       // match: (FMOVSstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64CMP_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMP x (MOVDconst [c]))
        // cond:
-       // result: (FMOVSstore [c] ptr val mem)
+       // result: (CMPconst [c] x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64FMOVSstore)
+               v.reset(OpARM64CMPconst)
                v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.AddArg(x)
                return true
        }
-       // match: (FMOVSstoreidx (MOVDconst [c]) idx val mem)
+       // match: (CMP (MOVDconst [c]) x)
        // cond:
-       // result: (FMOVSstore [c] idx val mem)
+       // result: (InvertFlags (CMPconst [c] x))
        for {
-               _ = v.Args[3]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64FMOVSstore)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FMULD_0(v *Value) bool {
-       // match: (FMULD (FNEGD x) y)
-       // cond:
-       // result: (FNMULD x y)
+       // match: (CMP x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (CMPshiftLL x0 y [c])
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNEGD {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64FNMULD)
-               v.AddArg(x)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64CMPshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       // match: (FMULD y (FNEGD x))
-       // cond:
-       // result: (FNMULD x y)
+       // match: (CMP x0:(SLLconst [c] y) x1)
+       // cond: clobberIfDead(x0)
+       // result: (InvertFlags (CMPshiftLL x1 y [c]))
        for {
                _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNEGD {
+               x0 := v.Args[0]
+               if x0.Op != OpARM64SLLconst {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64FNMULD)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := x0.AuxInt
+               y := x0.Args[0]
+               x1 := v.Args[1]
+               if !(clobberIfDead(x0)) {
+                       break
+               }
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPshiftLL, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x1)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FMULS_0(v *Value) bool {
-       // match: (FMULS (FNEGS x) y)
-       // cond:
-       // result: (FNMULS x y)
+       // match: (CMP x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (CMPshiftRL x0 y [c])
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNEGS {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64FNMULS)
-               v.AddArg(x)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64CMPshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       // match: (FMULS y (FNEGS x))
-       // cond:
-       // result: (FNMULS x y)
+       // match: (CMP x0:(SRLconst [c] y) x1)
+       // cond: clobberIfDead(x0)
+       // result: (InvertFlags (CMPshiftRL x1 y [c]))
        for {
                _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNEGS {
+               x0 := v.Args[0]
+               if x0.Op != OpARM64SRLconst {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64FNMULS)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64FNEGD_0(v *Value) bool {
-       // match: (FNEGD (FMULD x y))
-       // cond:
-       // result: (FNMULD x y)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FMULD {
+               c := x0.AuxInt
+               y := x0.Args[0]
+               x1 := v.Args[1]
+               if !(clobberIfDead(x0)) {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64FNMULD)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPshiftRL, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x1)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (FNEGD (FNMULD x y))
-       // cond:
-       // result: (FMULD x y)
+       // match: (CMP x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (CMPshiftRA x0 y [c])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNMULD {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64FMULD)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64FNEGS_0(v *Value) bool {
-       // match: (FNEGS (FMULS x y))
-       // cond:
-       // result: (FNMULS x y)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FMULS {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64FNMULS)
-               v.AddArg(x)
+               v.reset(OpARM64CMPshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       // match: (FNEGS (FNMULS x y))
-       // cond:
-       // result: (FMULS x y)
+       // match: (CMP x0:(SRAconst [c] y) x1)
+       // cond: clobberIfDead(x0)
+       // result: (InvertFlags (CMPshiftRA x1 y [c]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNMULS {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               if x0.Op != OpARM64SRAconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64FMULS)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := x0.AuxInt
+               y := x0.Args[0]
+               x1 := v.Args[1]
+               if !(clobberIfDead(x0)) {
+                       break
+               }
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPshiftRA, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x1)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FNMULD_0(v *Value) bool {
-       // match: (FNMULD (FNEGD x) y)
+func rewriteValueARM64_OpARM64CMPW_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPW x (MOVDconst [c]))
        // cond:
-       // result: (FMULD x y)
+       // result: (CMPWconst [int64(int32(c))] x)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNEGD {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64FMULD)
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPWconst)
+               v.AuxInt = int64(int32(c))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (FNMULD y (FNEGD x))
+       // match: (CMPW (MOVDconst [c]) x)
        // cond:
-       // result: (FMULD x y)
+       // result: (InvertFlags (CMPWconst [int64(int32(c))] x))
        for {
                _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNEGD {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64FMULD)
-               v.AddArg(x)
-               v.AddArg(y)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPWconst, types.TypeFlags)
+               v0.AuxInt = int64(int32(c))
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64FNMULS_0(v *Value) bool {
-       // match: (FNMULS (FNEGS x) y)
-       // cond:
-       // result: (FMULS x y)
+func rewriteValueARM64_OpARM64CMPWconst_0(v *Value) bool {
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)==int32(y)
+       // result: (FlagEQ)
        for {
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNEGS {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64FMULS)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (FNMULS y (FNEGS x))
-       // cond:
-       // result: (FMULS x y)
-       for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNEGS {
+               x := v_0.AuxInt
+               if !(int32(x) == int32(y)) {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64FMULS)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpARM64FlagEQ)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64FSUBD_0(v *Value) bool {
-       // match: (FSUBD a (FMULD x y))
-       // cond:
-       // result: (FMSUBD a x y)
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
+       // result: (FlagLT_ULT)
        for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMULD {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMSUBD)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               x := v_0.AuxInt
+               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (FSUBD (FMULD x y) a)
-       // cond:
-       // result: (FNMSUBD a x y)
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
+       // result: (FlagLT_UGT)
        for {
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FMULD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FNMSUBD)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (FSUBD a (FNMULD x y))
-       // cond:
-       // result: (FMADDD a x y)
-       for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNMULD {
+               x := v_0.AuxInt
+               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMADDD)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpARM64FlagLT_UGT)
                return true
        }
-       // match: (FSUBD (FNMULD x y) a)
-       // cond:
-       // result: (FNMADDD a x y)
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
+       // result: (FlagGT_ULT)
        for {
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNMULD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FNMADDD)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64FSUBS_0(v *Value) bool {
-       // match: (FSUBS a (FMULS x y))
-       // cond:
-       // result: (FMSUBS a x y)
-       for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMULS {
+               x := v_0.AuxInt
+               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMSUBS)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpARM64FlagGT_ULT)
                return true
        }
-       // match: (FSUBS (FMULS x y) a)
-       // cond:
-       // result: (FNMSUBS a x y)
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
+       // result: (FlagGT_UGT)
        for {
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FMULS {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FNMSUBS)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               x := v_0.AuxInt
+               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
                return true
        }
-       // match: (FSUBS a (FNMULS x y))
-       // cond:
-       // result: (FMADDS a x y)
+       // match: (CMPWconst (MOVBUreg _) [c])
+       // cond: 0xff < int32(c)
+       // result: (FlagLT_ULT)
        for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64FNMULS {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVBUreg {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               y := v_1.Args[1]
-               v.reset(OpARM64FMADDS)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               if !(0xff < int32(c)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (FSUBS (FNMULS x y) a)
-       // cond:
-       // result: (FNMADDS a x y)
+       // match: (CMPWconst (MOVHUreg _) [c])
+       // cond: 0xffff < int32(c)
+       // result: (FlagLT_ULT)
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FNMULS {
+               if v_0.Op != OpARM64MOVHUreg {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               a := v.Args[1]
-               v.reset(OpARM64FNMADDS)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
+               if !(0xffff < int32(c)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64GreaterEqual_0(v *Value) bool {
-       // match: (GreaterEqual (FlagEQ))
-       // cond:
-       // result: (MOVDconst [1])
+func rewriteValueARM64_OpARM64CMPconst_0(v *Value) bool {
+       // match: (CMPconst (MOVDconst [x]) [y])
+       // cond: x==y
+       // result: (FlagEQ)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (GreaterEqual (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               x := v_0.AuxInt
+               if !(x == y) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64FlagEQ)
                return true
        }
-       // match: (GreaterEqual (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (CMPconst (MOVDconst [x]) [y])
+       // cond: x<y && uint64(x)<uint64(y)
+       // result: (FlagLT_ULT)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_0.AuxInt
+               if !(x < y && uint64(x) < uint64(y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (GreaterEqual (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CMPconst (MOVDconst [x]) [y])
+       // cond: x<y && uint64(x)>uint64(y)
+       // result: (FlagLT_UGT)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               x := v_0.AuxInt
+               if !(x < y && uint64(x) > uint64(y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_UGT)
                return true
        }
-       // match: (GreaterEqual (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CMPconst (MOVDconst [x]) [y])
+       // cond: x>y && uint64(x)<uint64(y)
+       // result: (FlagGT_ULT)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (GreaterEqual (InvertFlags x))
-       // cond:
-       // result: (LessEqual x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               x := v_0.AuxInt
+               if !(x > y && uint64(x) < uint64(y)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64LessEqual)
-               v.AddArg(x)
+               v.reset(OpARM64FlagGT_ULT)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64GreaterEqualU_0(v *Value) bool {
-       // match: (GreaterEqualU (FlagEQ))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CMPconst (MOVDconst [x]) [y])
+       // cond: x>y && uint64(x)>uint64(y)
+       // result: (FlagGT_UGT)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               x := v_0.AuxInt
+               if !(x > y && uint64(x) > uint64(y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
                return true
        }
-       // match: (GreaterEqualU (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (CMPconst (MOVBUreg _) [c])
+       // cond: 0xff < c
+       // result: (FlagLT_ULT)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               if v_0.Op != OpARM64MOVBUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               if !(0xff < c) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (GreaterEqualU (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CMPconst (MOVHUreg _) [c])
+       // cond: 0xffff < c
+       // result: (FlagLT_ULT)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               if v_0.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               if !(0xffff < c) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (GreaterEqualU (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (CMPconst (MOVWUreg _) [c])
+       // cond: 0xffffffff < c
+       // result: (FlagLT_ULT)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if v_0.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               if !(0xffffffff < c) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (GreaterEqualU (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CMPconst (ANDconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
        for {
+               n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
-       // match: (GreaterEqualU (InvertFlags x))
-       // cond:
-       // result: (LessEqualU x)
+       // match: (CMPconst (SRLconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n)
+       // result: (FlagLT_ULT)
        for {
+               n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64LessEqualU)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               if !(0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64GreaterThan_0(v *Value) bool {
-       // match: (GreaterThan (FlagEQ))
+func rewriteValueARM64_OpARM64CMPshiftLL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPshiftLL (MOVDconst [c]) x [d])
        // cond:
-       // result: (MOVDconst [0])
+       // result: (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
        for {
+               d := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v0.AuxInt = c
+               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v1.AuxInt = d
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (GreaterThan (FlagLT_ULT))
+       // match: (CMPshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: (MOVDconst [0])
+       // result: (CMPconst x [int64(uint64(c)<<uint64(d))])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
                return true
        }
-       // match: (GreaterThan (FlagLT_UGT))
+       return false
+}
+func rewriteValueARM64_OpARM64CMPshiftRA_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPshiftRA (MOVDconst [c]) x [d])
        // cond:
-       // result: (MOVDconst [0])
+       // result: (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
        for {
+               d := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v0.AuxInt = c
+               v1 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
+               v1.AuxInt = d
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (GreaterThan (FlagGT_ULT))
+       // match: (CMPshiftRA x (MOVDconst [c]) [d])
        // cond:
-       // result: (MOVDconst [1])
+       // result: (CMPconst x [c>>uint64(d)])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = c >> uint64(d)
+               v.AddArg(x)
                return true
        }
-       // match: (GreaterThan (FlagGT_UGT))
+       return false
+}
+func rewriteValueARM64_OpARM64CMPshiftRL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPshiftRL (MOVDconst [c]) x [d])
        // cond:
-       // result: (MOVDconst [1])
+       // result: (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
        for {
+               d := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpARM64CMPconst, types.TypeFlags)
+               v0.AuxInt = c
+               v1 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
+               v1.AuxInt = d
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (GreaterThan (InvertFlags x))
+       // match: (CMPshiftRL x (MOVDconst [c]) [d])
        // cond:
-       // result: (LessThan x)
+       // result: (CMPconst x [int64(uint64(c)>>uint64(d))])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64LessThan)
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64GreaterThanU_0(v *Value) bool {
-       // match: (GreaterThanU (FlagEQ))
+func rewriteValueARM64_OpARM64CSEL_0(v *Value) bool {
+       // match: (CSEL {cc} x (MOVDconst [0]) flag)
        // cond:
-       // result: (MOVDconst [0])
+       // result: (CSEL0 {cc} x flag)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (GreaterThanU (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               cc := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (GreaterThanU (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               flag := v.Args[2]
+               v.reset(OpARM64CSEL0)
+               v.Aux = cc
+               v.AddArg(x)
+               v.AddArg(flag)
                return true
        }
-       // match: (GreaterThanU (FlagGT_ULT))
+       // match: (CSEL {cc} (MOVDconst [0]) y flag)
        // cond:
-       // result: (MOVDconst [0])
+       // result: (CSEL0 {arm64Negate(cc.(Op))} y flag)
        for {
+               cc := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (GreaterThanU (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_0.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               y := v.Args[1]
+               flag := v.Args[2]
+               v.reset(OpARM64CSEL0)
+               v.Aux = arm64Negate(cc.(Op))
+               v.AddArg(y)
+               v.AddArg(flag)
                return true
        }
-       // match: (GreaterThanU (InvertFlags x))
+       // match: (CSEL {cc} x y (InvertFlags cmp))
        // cond:
-       // result: (LessThanU x)
+       // result: (CSEL {arm64Invert(cc.(Op))} x y cmp)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               cc := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64InvertFlags {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64LessThanU)
+               cmp := v_2.Args[0]
+               v.reset(OpARM64CSEL)
+               v.Aux = arm64Invert(cc.(Op))
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cmp)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64LessEqual_0(v *Value) bool {
-       // match: (LessEqual (FlagEQ))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CSEL {cc} x _ flag)
+       // cond: ccARM64Eval(cc, flag) > 0
+       // result: x
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               cc := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               flag := v.Args[2]
+               if !(ccARM64Eval(cc, flag) > 0) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (LessEqual (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CSEL {cc} _ y flag)
+       // cond: ccARM64Eval(cc, flag) < 0
+       // result: y
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               cc := v.Aux
+               _ = v.Args[2]
+               y := v.Args[1]
+               flag := v.Args[2]
+               if !(ccARM64Eval(cc, flag) < 0) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (LessEqual (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CSEL {cc} x y (CMPWconst [0] bool))
+       // cond: cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil
+       // result: (CSEL {bool.Op} x y flagArg(bool))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               cc := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64CMPWconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (LessEqual (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (LessEqual (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               bool := v_2.Args[0]
+               if !(cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64CSEL)
+               v.Aux = bool.Op
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flagArg(bool))
                return true
        }
-       // match: (LessEqual (InvertFlags x))
-       // cond:
-       // result: (GreaterEqual x)
+       // match: (CSEL {cc} x y (CMPWconst [0] bool))
+       // cond: cc.(Op) == OpARM64Equal && flagArg(bool) != nil
+       // result: (CSEL {arm64Negate(bool.Op)} x y flagArg(bool))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               cc := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64CMPWconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64GreaterEqual)
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               bool := v_2.Args[0]
+               if !(cc.(Op) == OpARM64Equal && flagArg(bool) != nil) {
+                       break
+               }
+               v.reset(OpARM64CSEL)
+               v.Aux = arm64Negate(bool.Op)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(flagArg(bool))
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64LessEqualU_0(v *Value) bool {
-       // match: (LessEqualU (FlagEQ))
+func rewriteValueARM64_OpARM64CSEL0_0(v *Value) bool {
+       // match: (CSEL0 {cc} x (InvertFlags cmp))
        // cond:
-       // result: (MOVDconst [1])
+       // result: (CSEL0 {arm64Invert(cc.(Op))} x cmp)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               cc := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64InvertFlags {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               cmp := v_1.Args[0]
+               v.reset(OpARM64CSEL0)
+               v.Aux = arm64Invert(cc.(Op))
+               v.AddArg(x)
+               v.AddArg(cmp)
                return true
        }
-       // match: (LessEqualU (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CSEL0 {cc} x flag)
+       // cond: ccARM64Eval(cc, flag) > 0
+       // result: x
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               cc := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               flag := v.Args[1]
+               if !(ccARM64Eval(cc, flag) > 0) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (LessEqualU (FlagLT_UGT))
-       // cond:
+       // match: (CSEL0 {cc} _ flag)
+       // cond: ccARM64Eval(cc, flag) < 0
        // result: (MOVDconst [0])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               cc := v.Aux
+               _ = v.Args[1]
+               flag := v.Args[1]
+               if !(ccARM64Eval(cc, flag) < 0) {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       // match: (LessEqualU (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
+       // match: (CSEL0 {cc} x (CMPWconst [0] bool))
+       // cond: cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil
+       // result: (CSEL0 {bool.Op} x flagArg(bool))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               cc := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CMPWconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (LessEqualU (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (LessEqualU (InvertFlags x))
-       // cond:
-       // result: (GreaterEqualU x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               bool := v_1.Args[0]
+               if !(cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64GreaterEqualU)
+               v.reset(OpARM64CSEL0)
+               v.Aux = bool.Op
                v.AddArg(x)
+               v.AddArg(flagArg(bool))
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64LessThan_0(v *Value) bool {
-       // match: (LessThan (FlagEQ))
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (CSEL0 {cc} x (CMPWconst [0] bool))
+       // cond: cc.(Op) == OpARM64Equal && flagArg(bool) != nil
+       // result: (CSEL0 {arm64Negate(bool.Op)} x flagArg(bool))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               cc := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CMPWconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (LessThan (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (LessThan (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               bool := v_1.Args[0]
+               if !(cc.(Op) == OpARM64Equal && flagArg(bool) != nil) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.reset(OpARM64CSEL0)
+               v.Aux = arm64Negate(bool.Op)
+               v.AddArg(x)
+               v.AddArg(flagArg(bool))
                return true
        }
-       // match: (LessThan (FlagGT_ULT))
+       return false
+}
+func rewriteValueARM64_OpARM64DIV_0(v *Value) bool {
+       // match: (DIV (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (MOVDconst [c/d])
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (LessThan (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
+               d := v_1.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.AuxInt = c / d
                return true
        }
-       // match: (LessThan (InvertFlags x))
+       return false
+}
+func rewriteValueARM64_OpARM64DIVW_0(v *Value) bool {
+       // match: (DIVW (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (GreaterThan x)
+       // result: (MOVDconst [int64(int32(c)/int32(d))])
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64GreaterThan)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) / int32(d))
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool {
-       // match: (LessThanU (FlagEQ))
+func rewriteValueARM64_OpARM64EON_0(v *Value) bool {
+       // match: (EON x (MOVDconst [c]))
        // cond:
-       // result: (MOVDconst [0])
+       // result: (XORconst [^c] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = ^c
+               v.AddArg(x)
                return true
        }
-       // match: (LessThanU (FlagLT_ULT))
+       // match: (EON x x)
        // cond:
-       // result: (MOVDconst [1])
+       // result: (MOVDconst [-1])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.AuxInt = -1
                return true
        }
-       // match: (LessThanU (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (EON x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (EONshiftLL x0 y [c])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (LessThanU (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.reset(OpARM64EONshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (LessThanU (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (EON x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (EONshiftRL x0 y [c])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64EONshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (LessThanU (InvertFlags x))
-       // cond:
-       // result: (GreaterThanU x)
+       // match: (EON x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (EONshiftRA x0 y [c])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64GreaterThanU)
-               v.AddArg(x)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64EONshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MNEG x (MOVDconst [-1]))
+func rewriteValueARM64_OpARM64EONshiftLL_0(v *Value) bool {
+       // match: (EONshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: x
+       // result: (XORconst x [^int64(uint64(c)<<uint64(d))])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != -1 {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = ^int64(uint64(c) << uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (MNEG (MOVDconst [-1]) x)
-       // cond:
-       // result: x
+       // match: (EONshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if v_0.AuxInt != -1 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
                return true
        }
-       // match: (MNEG _ (MOVDconst [0]))
+       return false
+}
+func rewriteValueARM64_OpARM64EONshiftRA_0(v *Value) bool {
+       // match: (EONshiftRA x (MOVDconst [c]) [d])
        // cond:
-       // result: (MOVDconst [0])
+       // result: (XORconst x [^(c>>uint64(d))])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = ^(c >> uint64(d))
+               v.AddArg(x)
                return true
        }
-       // match: (MNEG (MOVDconst [0]) _)
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (EONshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               if v_0.AuxInt != 0 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
                        break
                }
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.AuxInt = -1
                return true
        }
-       // match: (MNEG x (MOVDconst [1]))
+       return false
+}
+func rewriteValueARM64_OpARM64EONshiftRL_0(v *Value) bool {
+       // match: (EONshiftRL x (MOVDconst [c]) [d])
        // cond:
-       // result: (NEG x)
+       // result: (XORconst x [^int64(uint64(c)>>uint64(d))])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 1 {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MNEG (MOVDconst [1]) x)
-       // cond:
-       // result: (NEG x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               if v_0.AuxInt != 1 {
-                       break
-               }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = ^int64(uint64(c) >> uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       // match: (EONshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
        for {
+               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       return false
+}
+func rewriteValueARM64_OpARM64Equal_0(v *Value) bool {
+       // match: (Equal (FlagEQ))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (Equal (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (Equal (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Equal (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (Equal (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (Equal (InvertFlags x))
+       // cond:
+       // result: (Equal x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               x := v_0.Args[0]
+               v.reset(OpARM64Equal)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+func rewriteValueARM64_OpARM64FADDD_0(v *Value) bool {
+       // match: (FADDD a (FMULD x y))
+       // cond:
+       // result: (FMADDD a x y)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if v_1.Op != OpARM64FMULD {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMADDD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       // match: (FADDD (FMULD x y) a)
+       // cond:
+       // result: (FMADDD a x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if v_0.Op != OpARM64FMULD {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FMADDD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (FADDD a (FNMULD x y))
+       // cond:
+       // result: (FMSUBD a x y)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if v_1.Op != OpARM64FNMULD {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMSUBD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (FADDD (FNMULD x y) a)
+       // cond:
+       // result: (FMSUBD a x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if v_0.Op != OpARM64FNMULD {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FMSUBD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       return false
+}
+func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
+       // match: (FADDS a (FMULS x y))
+       // cond:
+       // result: (FMADDS a x y)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               if v_1.Op != OpARM64FMULS {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMADDS)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       // match: (FADDS (FMULS x y) a)
+       // cond:
+       // result: (FMADDS a x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               if v_0.Op != OpARM64FMULS {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FMADDS)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // match: (FADDS a (FNMULS x y))
+       // cond:
+       // result: (FMSUBS a x y)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               if v_1.Op != OpARM64FNMULS {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMSUBS)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // match: (FADDS (FNMULS x y) a)
+       // cond:
+       // result: (FMSUBS a x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FNMULS {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FMSUBS)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (FMOVDfpgp <t> (Arg [off] {sym}))
+       // cond:
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
+       for {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpArg {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
+               off := v_0.AuxInt
+               sym := v_0.Aux
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
                return true
        }
-       // match: (MNEG x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (FMOVDgpfp <t> (Arg [off] {sym}))
+       // cond:
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpArg {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
+               off := v_0.AuxInt
+               sym := v_0.Aux
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
                return true
        }
-       // match: (MNEG (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVDload [off1+off2] {sym} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               v.reset(OpARM64FMOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MNEG_20(v *Value) bool {
-       // match: (MNEG (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [-c*d])
+       // match: (FMOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVDloadidx ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c * d
+               v.reset(OpARM64FMOVDloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEG (MOVDconst [d]) (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c*d])
+       // match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               d := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c * d
+               v.reset(OpARM64FMOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: x
+func rewriteValueARM64_OpARM64FMOVDloadidx_0(v *Value) bool {
+       // match: (FMOVDloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (FMOVDload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVDload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: x
+       // match: (FMOVDloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (FMOVDload [c] ptr mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == -1) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVDload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (FMOVDstore ptr (FMOVDgpfp val) mem)
+       // cond:
+       // result: (MOVDstore ptr val mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               if v_1.Op != OpARM64FMOVDgpfp {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstore)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVDstore [off1+off2] {sym} ptr val mem)
        for {
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64FMOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: (NEG x)
+       // match: (FMOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVDstoreidx ptr idx val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64FMOVDstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: (NEG x)
+       // match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64FMOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDstoreidx_0(v *Value) bool {
+       // match: (FMOVDstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (FMOVDstore [c] ptr val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64FMOVDstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (FMOVDstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (FMOVDstore [c] idx val mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64FMOVDstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVSload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVSload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64FMOVSload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (FMOVSload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVSloadidx ptr idx mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64FMOVSloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       // match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64FMOVSload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+func rewriteValueARM64_OpARM64FMOVSloadidx_0(v *Value) bool {
+       // match: (FMOVSloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (FMOVSload [c] ptr mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVSload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       // match: (FMOVSloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (FMOVSload [c] ptr mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = log2(c + 1)
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVSload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVSstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVSstore [off1+off2] {sym} ptr val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       // match: (FMOVSstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVSstoreidx ptr idx val mem)
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpARM64FMOVSstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       // match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
-       for {
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 5)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 2
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVSstoreidx_0(v *Value) bool {
+       // match: (FMOVSstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (FMOVSstore [c] ptr val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       // match: (FMOVSstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (FMOVSstore [c] idx val mem)
        for {
-               _ = v.Args[1]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
-                       break
-               }
-               v.reset(OpARM64SLLconst)
-               v.Type = x.Type
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MNEGW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       return false
+}
+func rewriteValueARM64_OpARM64FMULD_0(v *Value) bool {
+       // match: (FMULD (FNEGD x) y)
+       // cond:
+       // result: (FNMULD x y)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FNEGD {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64FNMULD)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEGW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       // match: (FMULD y (FNEGD x))
+       // cond:
+       // result: (FNMULD x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FNEGD {
                        break
                }
-               v.reset(OpARM64NEG)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = log2(c / 9)
-               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v1.AuxInt = 3
-               v1.AddArg(x)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               x := v_1.Args[0]
+               v.reset(OpARM64FNMULD)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MNEGW_20(v *Value) bool {
-       // match: (MNEGW (MOVDconst [c]) (MOVDconst [d]))
+func rewriteValueARM64_OpARM64FMULS_0(v *Value) bool {
+       // match: (FMULS (FNEGS x) y)
        // cond:
-       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       // result: (FNMULS x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FNEGS {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -int64(int32(c) * int32(d))
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64FNMULS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MNEGW (MOVDconst [d]) (MOVDconst [c]))
+       // match: (FMULS y (FNEGS x))
        // cond:
-       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       // result: (FNMULS x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               d := v_0.AuxInt
+               y := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64FNEGS {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -int64(int32(c) * int32(d))
+               x := v_1.Args[0]
+               v.reset(OpARM64FNMULS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOD_0(v *Value) bool {
-       // match: (MOD (MOVDconst [c]) (MOVDconst [d]))
+func rewriteValueARM64_OpARM64FNEGD_0(v *Value) bool {
+       // match: (FNEGD (FMULD x y))
        // cond:
-       // result: (MOVDconst [c%d])
+       // result: (FNMULD x y)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FMULD {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64FNMULD)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (FNEGD (FNMULD x y))
+       // cond:
+       // result: (FMULD x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FNMULD {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c % d
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64FMULD)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MODW_0(v *Value) bool {
-       // match: (MODW (MOVDconst [c]) (MOVDconst [d]))
+func rewriteValueARM64_OpARM64FNEGS_0(v *Value) bool {
+       // match: (FNEGS (FMULS x y))
        // cond:
-       // result: (MOVDconst [int64(int32(c)%int32(d))])
+       // result: (FNMULS x y)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FMULS {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64FNMULS)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (FNEGS (FNMULS x y))
+       // cond:
+       // result: (FMULS x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FNMULS {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) % int32(d))
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64FMULS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBUload [off1+off2] {sym} ptr mem)
+func rewriteValueARM64_OpARM64FNMULD_0(v *Value) bool {
+       // match: (FNMULD (FNEGD x) y)
+       // cond:
+       // result: (FMULD x y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.Op != OpARM64FNEGD {
                        break
                }
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64FMULD)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBUloadidx ptr idx mem)
+       // match: (FNMULD y (FNEGD x))
+       // cond:
+       // result: (FMULD x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FNEGD {
                        break
                }
-               v.reset(OpARM64MOVBUloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               x := v_1.Args[0]
+               v.reset(OpARM64FMULD)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64FNMULS_0(v *Value) bool {
+       // match: (FNMULS (FNEGS x) y)
+       // cond:
+       // result: (FMULS x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.Op != OpARM64FNEGS {
                        break
                }
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64FMULS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (FNMULS y (FNEGS x))
+       // cond:
+       // result: (FMULS x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
+               y := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBstorezero {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_1.Op != OpARM64FNEGS {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_1.Args[0]
+               v.reset(OpARM64FMULS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool {
-       // match: (MOVBUloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64FSUBD_0(v *Value) bool {
+       // match: (FSUBD a (FMULD x y))
        // cond:
-       // result: (MOVBUload [c] ptr mem)
+       // result: (FMSUBD a x y)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               a := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64FMULD {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMSUBD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUloadidx (MOVDconst [c]) ptr mem)
+       // match: (FSUBD (FMULD x y) a)
        // cond:
-       // result: (MOVBUload [c] ptr mem)
+       // result: (FNMSUBD a x y)
        for {
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FMULD {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FNMSUBD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (FSUBD a (FNMULD x y))
+       // cond:
+       // result: (FMADDD a x y)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVBstorezeroidx {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FNMULD {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMADDD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
-       // match: (MOVBUreg x:(MOVBUload _ _))
+       // match: (FSUBD (FNMULD x y) a)
        // cond:
-       // result: (MOVDreg x)
+       // result: (FNMADDD a x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FNMULD {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FNMADDD)
+               v.AddArg(a)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUreg x:(MOVBUloadidx _ _ _))
+       return false
+}
+func rewriteValueARM64_OpARM64FSUBS_0(v *Value) bool {
+       // match: (FSUBS a (FMULS x y))
        // cond:
-       // result: (MOVDreg x)
+       // result: (FMSUBS a x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FMULS {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMSUBS)
+               v.AddArg(a)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUreg x:(MOVBUreg _))
+       // match: (FSUBS (FMULS x y) a)
        // cond:
-       // result: (MOVDreg x)
+       // result: (FNMSUBS a x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FMULS {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FNMSUBS)
+               v.AddArg(a)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUreg (ANDconst [c] x))
+       // match: (FSUBS a (FNMULS x y))
        // cond:
-       // result: (ANDconst [c&(1<<8-1)] x)
+       // result: (FMADDS a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FNMULS {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               y := v_1.Args[1]
+               v.reset(OpARM64FMADDS)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (FSUBS (FNMULS x y) a)
+       // cond:
+       // result: (FNMADDS a x y)
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               if v_0.Op != OpARM64FNMULS {
                        break
                }
-               c := v_0.AuxInt
+               _ = v_0.Args[1]
                x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<8 - 1)
+               y := v_0.Args[1]
+               a := v.Args[1]
+               v.reset(OpARM64FNMADDS)
+               v.AddArg(a)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBUreg (MOVDconst [c]))
+       return false
+}
+func rewriteValueARM64_OpARM64GreaterEqual_0(v *Value) bool {
+       // match: (GreaterEqual (FlagEQ))
        // cond:
-       // result: (MOVDconst [int64(uint8(c))])
+       // result: (MOVDconst [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint8(c))
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBUreg x)
-       // cond: x.Type.IsBoolean()
-       // result: (MOVDreg x)
+       // match: (GreaterEqual (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               x := v.Args[0]
-               if !(x.Type.IsBoolean()) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<8-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
+       // match: (GreaterEqual (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<8-1, sc)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (GreaterEqual (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<8-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 8)] x)
+       // match: (GreaterEqual (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<8-1, 0)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (GreaterEqual (InvertFlags x))
+       // cond:
+       // result: (LessEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 8)
+               x := v_0.Args[0]
+               v.reset(OpARM64LessEqual)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+func rewriteValueARM64_OpARM64GreaterEqualU_0(v *Value) bool {
+       // match: (GreaterEqualU (FlagEQ))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBloadidx ptr idx mem)
+       // match: (GreaterEqualU (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               v.reset(OpARM64MOVBloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (GreaterEqualU (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // match: (GreaterEqualU (FlagGT_ULT))
+       // cond:
        // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBstorezero {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool {
-       // match: (MOVBloadidx ptr (MOVDconst [c]) mem)
+       // match: (GreaterEqualU (FlagGT_UGT))
        // cond:
-       // result: (MOVBload [c] ptr mem)
+       // result: (MOVDconst [1])
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBloadidx (MOVDconst [c]) ptr mem)
+       // match: (GreaterEqualU (InvertFlags x))
        // cond:
-       // result: (MOVBload [c] ptr mem)
+       // result: (LessEqualU x)
        for {
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpARM64LessEqualU)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       return false
+}
+func rewriteValueARM64_OpARM64GreaterThan_0(v *Value) bool {
+       // match: (GreaterThan (FlagEQ))
+       // cond:
        // result: (MOVDconst [0])
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVBstorezeroidx {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
-       // match: (MOVBreg x:(MOVBload _ _))
+       // match: (GreaterThan (FlagLT_ULT))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [0])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBreg x:(MOVBloadidx _ _ _))
+       // match: (GreaterThan (FlagLT_UGT))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [0])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBreg x:(MOVBreg _))
+       // match: (GreaterThan (FlagGT_ULT))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [1])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBreg (MOVDconst [c]))
+       // match: (GreaterThan (FlagGT_UGT))
        // cond:
-       // result: (MOVDconst [int64(int8(c))])
+       // result: (MOVDconst [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               c := v_0.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int8(c))
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBreg (SLLconst [lc] x))
-       // cond: lc < 8
-       // result: (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
+       // match: (GreaterThan (InvertFlags x))
+       // cond:
+       // result: (LessThan x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               lc := v_0.AuxInt
                x := v_0.Args[0]
-               if !(lc < 8) {
-                       break
-               }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 8-lc)
+               v.reset(OpARM64LessThan)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+func rewriteValueARM64_OpARM64GreaterThanU_0(v *Value) bool {
+       // match: (GreaterThanU (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBstoreidx ptr idx val mem)
+       // match: (GreaterThanU (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (GreaterThanU (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // match: (GreaterThanU (FlagGT_ULT))
        // cond:
-       // result: (MOVBstorezero [off] {sym} ptr mem)
+       // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               if v_1.AuxInt != 0 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
+       // match: (GreaterThanU (FlagGT_UGT))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (MOVDconst [1])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem)
+       // match: (GreaterThanU (InvertFlags x))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (LessThanU x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVBUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               x := v_0.Args[0]
+               v.reset(OpARM64LessThanU)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueARM64_OpARM64LessEqual_0(v *Value) bool {
+       // match: (LessEqual (FlagEQ))
+       // cond:
+       // result: (MOVDconst [1])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem)
+       // match: (LessEqual (FlagLT_ULT))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (MOVDconst [1])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem)
+       // match: (LessEqual (FlagLT_UGT))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (MOVDconst [1])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem)
+       // match: (LessEqual (FlagGT_ULT))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (LessEqual (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               if v_1.AuxInt != 8 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessEqual (InvertFlags x))
+       // cond:
+       // result: (GreaterEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               x := v_0.Args[0]
+               v.reset(OpARM64GreaterEqual)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64LessEqualU_0(v *Value) bool {
+       // match: (LessEqualU (FlagEQ))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               if x.AuxInt != i-1 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (LessEqualU (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (LessEqualU (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessEqualU (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (LessEqualU (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (LessEqualU (InvertFlags x))
+       // cond:
+       // result: (GreaterEqualU x)
        for {
-               if v.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
+               x := v_0.Args[0]
+               v.reset(OpARM64GreaterEqualU)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64LessThan_0(v *Value) bool {
+       // match: (LessThan (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessThan (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               if v_1.AuxInt != 8 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (LessThan (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (LessThan (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (LessThan (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(8, 8) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessThan (InvertFlags x))
+       // cond:
+       // result: (GreaterThan x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               if x.AuxInt != i-1 {
+               x := v_0.Args[0]
+               v.reset(OpARM64GreaterThan)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64LessThanU_0(v *Value) bool {
+       // match: (LessThanU (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessThanU (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (LessThanU (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessThanU (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (LessThanU (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               if v.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (LessThanU (InvertFlags x))
+       // cond:
+       // result: (GreaterThanU x)
+       for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpARM64GreaterThanU)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MNEG_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEG x (MOVDconst [-1]))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(8, 8) {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               if v_1.AuxInt != -1 {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MNEG (MOVDconst [-1]) x)
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if v_0.AuxInt != -1 {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
-               v.AddArg(mem)
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (MNEG _ (MOVDconst [0]))
+       // cond:
+       // result: (MOVDconst [0])
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
+               _ = v.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               if v_1.AuxInt != 0 {
                        break
                }
-               if x.AuxInt != i-1 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MNEG (MOVDconst [0]) _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x.Aux != s {
+               if v_0.AuxInt != 0 {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MNEG x (MOVDconst [1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (MNEG (MOVDconst [1]) x)
+       // cond:
+       // result: (NEG x)
        for {
-               if v.AuxInt != 1 {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.AuxInt != 1 {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 8 {
-                       break
-               }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               if x.AuxInt != i-1 {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x.Aux != s {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 8 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               if x.AuxInt != i-1 {
-                       break
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
                }
-               if x.Aux != s {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       // match: (MNEG x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
        for {
-               if v.AuxInt != 1 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEG (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MNEG_20(v *Value) bool {
+       // match: (MNEG (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [-c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if w != w0.Args[0] {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c * d
+               return true
+       }
+       // match: (MNEG (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c*d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c * d
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
+func rewriteValueARM64_OpARM64MNEGW_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: x
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               bfc := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: x
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64UBFX {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == -1) {
                        break
                }
-               bfc2 := w0.AuxInt
-               if w != w0.Args[0] {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MNEGW _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && getARM64BFwidth(bfc) == 32-getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32-getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc)-8 && clobber(x)) {
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       // match: (MNEGW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               bfc := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64UBFX {
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
                        break
                }
-               bfc2 := w0.AuxInt
-               if w != w0.Args[0] {
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32-getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32-getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc)-8 && clobber(x)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               j := v_1.AuxInt
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c)] x))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               if w0.AuxInt != j-8 {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               w0_0 := w0.Args[0]
-               if w0_0.Op != OpARM64MOVDreg {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               if w != w0_0.Args[0] {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = log2(c + 1)
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               w0_0 := w0.Args[0]
-               if w0_0.Op != OpARM64MOVDreg {
-                       break
-               }
-               if w != w0_0.Args[0] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
-       // result: (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
-                       break
-               }
-               if x0.AuxInt != i-1 {
-                       break
-               }
-               if x0.Aux != s {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               if w != x0_1.Args[0] {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               if x1.AuxInt != i-2 {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 5)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 2
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x1.Aux != s {
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               v.reset(OpARM64SLLconst)
+               v.Type = x.Type
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if w != x1_1.Args[0] {
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MNEGW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x2.AuxInt != i-3 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               if x2.Aux != s {
+               v.reset(OpARM64NEG)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = log2(c / 9)
+               v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v1.AuxInt = 3
+               v1.AddArg(x)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MNEGW_20(v *Value) bool {
+       // match: (MNEGW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -int64(int32(c) * int32(d))
+               return true
+       }
+       // match: (MNEGW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x2_1.AuxInt != 24 {
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if w != x2_1.Args[0] {
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -int64(int32(c) * int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOD_0(v *Value) bool {
+       // match: (MOD (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c%d])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x3 := x2.Args[2]
-               if x3.Op != OpARM64MOVBstore {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x3.AuxInt != i-4 {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c % d
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MODW_0(v *Value) bool {
+       // match: (MODW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)%int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) % int32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBUload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBUloadidx_0(v *Value) bool {
+       // match: (MOVBUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVBUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBUreg_0(v *Value) bool {
+       // match: (MOVBUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (ANDconst [c] x))
+       // cond:
+       // result: (ANDconst [c&(1<<8-1)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<8 - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint8(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint8(c))
+               return true
+       }
+       // match: (MOVBUreg x)
+       // cond: x.Type.IsBoolean()
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if !(x.Type.IsBoolean()) {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<8-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<8-1, sc)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<8-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 8)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<8-1, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 8)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBloadidx_0(v *Value) bool {
+       // match: (MOVBloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVBload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBstorezeroidx {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBreg_0(v *Value) bool {
+       // match: (MOVBreg x:(MOVBload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
+                       break
+               }
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int8(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int8(c))
+               return true
+       }
+       // match: (MOVBreg (SLLconst [lc] x))
+       // cond: lc < 8
+       // result: (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 8) {
+                       break
+               }
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 8-lc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBstoreidx ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVBstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVBUreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVHreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
+       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               if v_1.AuxInt != arm64BFAuxInt(8, 8) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               if v_1.AuxInt != arm64BFAuxInt(8, 8) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               if v_1.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64UBFX {
+                       break
+               }
+               bfc2 := w0.AuxInt
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && getARM64BFwidth(bfc) == 32-getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32-getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc)-8 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
+                       break
+               }
+               bfc := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64UBFX {
+                       break
+               }
+               bfc2 := w0.AuxInt
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && getARM64BFwidth(bfc) == 32-getARM64BFlsb(bfc) && getARM64BFwidth(bfc2) == 32-getARM64BFlsb(bfc2) && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc)-8 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr0 w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               w0_0 := w0.Args[0]
+               if w0_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != w0_0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr1 idx1 w0 mem)
+       for {
+               if v.AuxInt != 1 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               j := v_1.AuxInt
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               w0_0 := w0.Args[0]
+               if w0_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != w0_0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               x3 := x2.Args[2]
+               if x3.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
+                       break
+               }
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x3_1.AuxInt != 32 {
+                       break
+               }
+               if w != x3_1.Args[0] {
+                       break
+               }
+               x4 := x3.Args[2]
+               if x4.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x4.AuxInt != i-5 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
+                       break
+               }
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x4_1.AuxInt != 40 {
+                       break
+               }
+               if w != x4_1.Args[0] {
+                       break
+               }
+               x5 := x4.Args[2]
+               if x5.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x5.AuxInt != i-6 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
+                       break
+               }
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x5_1.AuxInt != 48 {
+                       break
+               }
+               if w != x5_1.Args[0] {
+                       break
+               }
+               x6 := x5.Args[2]
+               if x6.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x6.AuxInt != i-7 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
+                       break
+               }
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x6_1.AuxInt != 56 {
+                       break
+               }
+               if w != x6_1.Args[0] {
+                       break
+               }
+               mem := x6.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [7] {s} p w x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) x4:(MOVBstore [2] {s} p (SRLconst [40] w) x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w) x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
+       for {
+               if v.AuxInt != 7 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != 6 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != 5 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if p != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != 4 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if p != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               x3 := x2.Args[2]
+               if x3.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x3.AuxInt != 3 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[2]
+               if p != x3.Args[0] {
+                       break
+               }
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x3_1.AuxInt != 32 {
+                       break
+               }
+               if w != x3_1.Args[0] {
+                       break
+               }
+               x4 := x3.Args[2]
+               if x4.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x4.AuxInt != 2 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[2]
+               if p != x4.Args[0] {
+                       break
+               }
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x4_1.AuxInt != 40 {
+                       break
+               }
+               if w != x4_1.Args[0] {
+                       break
+               }
+               x5 := x4.Args[2]
+               if x5.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x5.AuxInt != 1 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[2]
+               p1 := x5.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x5_1.AuxInt != 48 {
+                       break
+               }
+               if w != x5_1.Args[0] {
+                       break
+               }
+               x6 := x5.Args[2]
+               if x6.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x6.Args[3]
+               ptr0 := x6.Args[0]
+               idx0 := x6.Args[1]
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpARM64SRLconst {
+                       break
+               }
+               if x6_2.AuxInt != 56 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               mem := x6.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64UBFX {
+                       break
+               }
+               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64UBFX {
+                       break
+               }
+               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64UBFX {
+                       break
+               }
+               if x2_1.AuxInt != arm64BFAuxInt(24, 8) {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+       for {
+               if v.AuxInt != 3 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != 2 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64UBFX {
+                       break
+               }
+               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != 1 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64UBFX {
+                       break
+               }
+               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x2.Args[3]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64UBFX {
+                       break
+               }
+               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               x0_1_0 := x0_1.Args[0]
+               if x0_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x0_1_0.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               if x3.Aux != s {
+               if x1.AuxInt != i-2 {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               if x1.Aux != s {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64SRLconst {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               if x3_1.AuxInt != 32 {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               if w != x3_1.Args[0] {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               x4 := x3.Args[2]
-               if x4.Op != OpARM64MOVBstore {
+               x1_1_0 := x1_1.Args[0]
+               if x1_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if x4.AuxInt != i-5 {
+               if w != x1_1_0.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               if x2.AuxInt != i-3 {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64SRLconst {
+               if x2.Aux != s {
                        break
                }
-               if x4_1.AuxInt != 40 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if w != x4_1.Args[0] {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
                        break
                }
-               x5 := x4.Args[2]
-               if x5.Op != OpARM64MOVBstore {
+               if x2_1.AuxInt != 24 {
                        break
                }
-               if x5.AuxInt != i-6 {
+               x2_1_0 := x2_1.Args[0]
+               if x2_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if x5.Aux != s {
+               if w != x2_1_0.Args[0] {
                        break
                }
-               _ = x5.Args[2]
-               if ptr != x5.Args[0] {
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64SRLconst {
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w)) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+       for {
+               if v.AuxInt != 3 {
                        break
                }
-               if x5_1.AuxInt != 48 {
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if w != x5_1.Args[0] {
+               if x0.AuxInt != 2 {
                        break
                }
-               x6 := x5.Args[2]
-               if x6.Op != OpARM64MOVBstore {
+               if x0.Aux != s {
                        break
                }
-               if x6.AuxInt != i-7 {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x6.Args[2]
-               if ptr != x6.Args[0] {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpARM64SRLconst {
+               x0_1_0 := x0_1.Args[0]
+               if x0_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if x6_1.AuxInt != 56 {
+               if w != x0_1_0.Args[0] {
                        break
                }
-               if w != x6_1.Args[0] {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               mem := x6.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+               if x1.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 7
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               x1_1_0 := x1_1.Args[0]
+               if x1_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x1_1_0.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x2.Args[3]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_2.AuxInt != 24 {
+                       break
+               }
+               x2_2_0 := x2_2.Args[0]
+               if x2_2_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x2_2_0.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [7] {s} p w x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) x4:(MOVBstore [2] {s} p (SRLconst [40] w) x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w) x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
-       // result: (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_30(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
        for {
-               if v.AuxInt != 7 {
-                       break
-               }
+               i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
+               ptr := v.Args[0]
                w := v.Args[1]
                x0 := v.Args[2]
                if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if x0.AuxInt != 6 {
+               if x0.AuxInt != i-1 {
                        break
                }
                if x0.Aux != s {
                        break
                }
                _ = x0.Args[2]
-               if p != x0.Args[0] {
+               if ptr != x0.Args[0] {
                        break
                }
                x0_1 := x0.Args[1]
@@ -8174,14 +9697,14 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
                if x1.Op != OpARM64MOVBstore {
                        break
                }
-               if x1.AuxInt != 5 {
+               if x1.AuxInt != i-2 {
                        break
                }
                if x1.Aux != s {
                        break
                }
                _ = x1.Args[2]
-               if p != x1.Args[0] {
+               if ptr != x1.Args[0] {
                        break
                }
                x1_1 := x1.Args[1]
@@ -8198,14 +9721,14 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
                if x2.Op != OpARM64MOVBstore {
                        break
                }
-               if x2.AuxInt != 4 {
+               if x2.AuxInt != i-3 {
                        break
                }
                if x2.Aux != s {
                        break
                }
                _ = x2.Args[2]
-               if p != x2.Args[0] {
+               if ptr != x2.Args[0] {
                        break
                }
                x2_1 := x2.Args[1]
@@ -8218,870 +9741,1123 @@ func rewriteValueARM64_OpARM64MOVBstore_20(v *Value) bool {
                if w != x2_1.Args[0] {
                        break
                }
-               x3 := x2.Args[2]
-               if x3.Op != OpARM64MOVBstore {
-                       break
-               }
-               if x3.AuxInt != 3 {
-                       break
-               }
-               if x3.Aux != s {
-                       break
-               }
-               _ = x3.Args[2]
-               if p != x3.Args[0] {
-                       break
-               }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if x3_1.AuxInt != 32 {
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               if w != x3_1.Args[0] {
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+       for {
+               if v.AuxInt != 3 {
                        break
                }
-               x4 := x3.Args[2]
-               if x4.Op != OpARM64MOVBstore {
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
                        break
                }
-               if x4.AuxInt != 2 {
+               if x0.AuxInt != 2 {
                        break
                }
-               if x4.Aux != s {
+               if x0.Aux != s {
                        break
                }
-               _ = x4.Args[2]
-               if p != x4.Args[0] {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64SRLconst {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
                        break
                }
-               if x4_1.AuxInt != 40 {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               if w != x4_1.Args[0] {
+               if w != x0_1.Args[0] {
                        break
                }
-               x5 := x4.Args[2]
-               if x5.Op != OpARM64MOVBstore {
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
                        break
                }
-               if x5.AuxInt != 1 {
+               if x1.AuxInt != 1 {
                        break
                }
-               if x5.Aux != s {
+               if x1.Aux != s {
                        break
                }
-               _ = x5.Args[2]
-               p1 := x5.Args[0]
+               _ = x1.Args[2]
+               p1 := x1.Args[0]
                if p1.Op != OpARM64ADD {
                        break
                }
                _ = p1.Args[1]
                ptr1 := p1.Args[0]
                idx1 := p1.Args[1]
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64SRLconst {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
                        break
                }
-               if x5_1.AuxInt != 48 {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               if w != x5_1.Args[0] {
+               if w != x1_1.Args[0] {
                        break
                }
-               x6 := x5.Args[2]
-               if x6.Op != OpARM64MOVBstoreidx {
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = x6.Args[3]
-               ptr0 := x6.Args[0]
-               idx0 := x6.Args[1]
-               x6_2 := x6.Args[2]
-               if x6_2.Op != OpARM64SRLconst {
+               _ = x2.Args[3]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64SRLconst {
                        break
                }
-               if x6_2.AuxInt != 56 {
+               if x2_2.AuxInt != 24 {
                        break
                }
-               if w != x6_2.Args[0] {
+               if w != x2_2.Args[0] {
                        break
                }
-               mem := x6.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr0)
                v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if x0.AuxInt != i-1 {
+               if x.AuxInt != i-1 {
                        break
                }
-               if x0.Aux != s {
+               if x.Aux != s {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64UBFX {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
                        break
                }
-               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
+               if x_1.AuxInt != 8 {
                        break
                }
-               if w != x0_1.Args[0] {
+               if w != x_1.Args[0] {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if x1.AuxInt != i-2 {
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               if x1.Aux != s {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64UBFX {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64SRLconst {
                        break
                }
-               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
+               if x_2.AuxInt != 8 {
                        break
                }
-               if w != x1_1.Args[0] {
+               if w != x_2.Args[0] {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               if x2.AuxInt != i-3 {
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if x2.Aux != s {
+               if x.AuxInt != i-1 {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               if x.Aux != s {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64UBFX {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               if x2_1.AuxInt != arm64BFAuxInt(24, 8) {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64UBFX {
                        break
                }
-               if w != x2_1.Args[0] {
+               if x_1.AuxInt != arm64BFAuxInt(8, 8) {
                        break
                }
-               mem := x2.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               if w != x_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 3
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               if v.AuxInt != 3 {
+               if v.AuxInt != 1 {
                        break
                }
                s := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
                w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x0.AuxInt != 2 {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64UBFX {
                        break
                }
-               if x0.Aux != s {
+               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               if w != x_2.Args[0] {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64UBFX {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               if x0_1.AuxInt != arm64BFAuxInt(8, 24) {
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if w != x0_1.Args[0] {
+               if x.AuxInt != i-1 {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               if x.Aux != s {
                        break
                }
-               if x1.AuxInt != 1 {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if x_1.AuxInt != 8 {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64UBFX {
+               x_1_0 := x_1.Args[0]
+               if x_1_0.Op != OpARM64MOVDreg {
+                       break
+               }
+               if w != x_1_0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               if x1_1.AuxInt != arm64BFAuxInt(16, 16) {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if w != x1_1.Args[0] {
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstoreidx {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64SRLconst {
                        break
                }
-               _ = x2.Args[3]
-               ptr0 := x2.Args[0]
-               idx0 := x2.Args[1]
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64UBFX {
+               if x_2.AuxInt != 8 {
                        break
                }
-               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
+               x_2_0 := x_2.Args[0]
+               if x_2_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w != x2_2.Args[0] {
+               if w != x_2_0.Args[0] {
                        break
                }
-               mem := x2.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr0)
                v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
-                       break
-               }
-               if x0.AuxInt != i-1 {
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if x0.Aux != s {
+               if x.AuxInt != i-1 {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               if x.Aux != s {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64UBFX {
                        break
                }
-               x0_1_0 := x0_1.Args[0]
-               if x0_1_0.Op != OpARM64MOVDreg {
+               if x_1.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               if w != x0_1_0.Args[0] {
+               if w != x_1.Args[0] {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if x1.AuxInt != i-2 {
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               if x1.Aux != s {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64UBFX {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               if x_2.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               x1_1_0 := x1_1.Args[0]
-               if x1_1_0.Op != OpARM64MOVDreg {
+               if w != x_2.Args[0] {
                        break
                }
-               if w != x1_1_0.Args[0] {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore {
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstore_40(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
                        break
                }
-               if x2.AuxInt != i-3 {
+               if x.AuxInt != i-1 {
                        break
                }
-               if x2.Aux != s {
+               if x.Aux != s {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
                        break
                }
-               if x2_1.AuxInt != 24 {
+               if x_1.AuxInt != 8 {
                        break
                }
-               x2_1_0 := x2_1.Args[0]
-               if x2_1_0.Op != OpARM64MOVDreg {
+               x_1_0 := x_1.Args[0]
+               if x_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if w != x2_1_0.Args[0] {
+               if w != x_1_0.Args[0] {
                        break
                }
-               mem := x2.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 3
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w)) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
        for {
-               if v.AuxInt != 3 {
+               if v.AuxInt != 1 {
                        break
                }
                s := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr1 := v_0.Args[0]
+               idx1 := v_0.Args[1]
                w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x0.AuxInt != 2 {
+               _ = x.Args[3]
+               ptr0 := x.Args[0]
+               idx0 := x.Args[1]
+               x_2 := x.Args[2]
+               if x_2.Op != OpARM64SRLconst {
                        break
                }
-               if x0.Aux != s {
+               if x_2.AuxInt != 8 {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               x_2_0 := x_2.Args[0]
+               if x_2_0.Op != OpARM64MOVDreg {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               if w != x_2_0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr0)
+               v.AddArg(idx0)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstoreidx_0(v *Value) bool {
+       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVBstore [c] ptr val mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (MOVBstore [c] idx val mem)
+       for {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVBstorezeroidx ptr idx mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               x0_1_0 := x0_1.Args[0]
-               if x0_1_0.Op != OpARM64MOVDreg {
+               if v_2.AuxInt != 0 {
                        break
                }
-               if w != x0_1_0.Args[0] {
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBreg {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVBUreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVBUreg {
                        break
                }
-               if x1.AuxInt != 1 {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVHreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               if x1.Aux != s {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVWreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVBstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVBstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstoreidx ptr idx w mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               x1_1_0 := x1_1.Args[0]
-               if x1_1_0.Op != OpARM64MOVDreg {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if w != x1_1_0.Args[0] {
+               idx := v_1.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64SRLconst {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstoreidx {
+               if v_2.AuxInt != 8 {
                        break
                }
-               _ = x2.Args[3]
-               ptr0 := x2.Args[0]
-               idx0 := x2.Args[1]
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64SRLconst {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               if x2_2.AuxInt != 24 {
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
                        break
                }
-               x2_2_0 := x2_2.Args[0]
-               if x2_2_0.Op != OpARM64MOVDreg {
+               if idx != x.Args[1] {
                        break
                }
-               if w != x2_2_0.Args[0] {
+               if w != x.Args[2] {
                        break
                }
-               mem := x2.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstore_30(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVBstoreidx_10(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
+       // match: (MOVBstoreidx ptr (ADDconst [3] idx) w x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       // result: (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               if x0.AuxInt != i-1 {
+               if v_1.AuxInt != 3 {
                        break
                }
-               if x0.Aux != s {
+               idx := v_1.Args[0]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = x0.Args[2]
+               _ = x0.Args[3]
                if ptr != x0.Args[0] {
                        break
                }
                x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               if x0_1.AuxInt != 2 {
                        break
                }
-               if w != x0_1.Args[0] {
+               if idx != x0_1.Args[0] {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpARM64UBFX {
                        break
                }
-               if x1.AuxInt != i-2 {
+               if x0_2.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               if x1.Aux != s {
+               if w != x0_2.Args[0] {
                        break
                }
-               _ = x1.Args[2]
+               x1 := x0.Args[3]
+               if x1.Op != OpARM64MOVBstoreidx {
+                       break
+               }
+               _ = x1.Args[3]
                if ptr != x1.Args[0] {
                        break
                }
                x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               if x1_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
                        break
                }
-               if w != x1_1.Args[0] {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpARM64UBFX {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstore {
+               if x1_2.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               if x2.AuxInt != i-3 {
+               if w != x1_2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2 := x1.Args[3]
+               if x2.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = x2.Args[2]
+               _ = x2.Args[3]
                if ptr != x2.Args[0] {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64SRLconst {
+               if idx != x2.Args[1] {
                        break
                }
-               if x2_1.AuxInt != 24 {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64UBFX {
                        break
                }
-               if w != x2_1.Args[0] {
+               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
                        break
                }
-               mem := x2.Args[2]
+               if w != x2_2.Args[0] {
+                       break
+               }
+               mem := x2.Args[3]
                if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 3
-               v.Aux = s
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [3] {s} p w x0:(MOVBstore [2] {s} p (SRLconst [8] w) x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w) x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
+       // match: (MOVBstoreidx ptr idx w x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstoreidx ptr idx w mem)
        for {
-               if v.AuxInt != 3 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpARM64MOVBstore {
-                       break
-               }
-               if x0.AuxInt != 2 {
-                       break
-               }
-               if x0.Aux != s {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               _ = x0.Args[3]
+               if ptr != x0.Args[0] {
                        break
                }
                x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64SRLconst {
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               if x0_1.AuxInt != 1 {
                        break
                }
-               if w != x0_1.Args[0] {
+               if idx != x0_1.Args[0] {
                        break
                }
-               x1 := x0.Args[2]
-               if x1.Op != OpARM64MOVBstore {
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpARM64UBFX {
                        break
                }
-               if x1.AuxInt != 1 {
+               if x0_2.AuxInt != arm64BFAuxInt(8, 24) {
                        break
                }
-               if x1.Aux != s {
+               if w != x0_2.Args[0] {
                        break
                }
-               _ = x1.Args[2]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               x1 := x0.Args[3]
+               if x1.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64SRLconst {
+               _ = x1.Args[3]
+               if ptr != x1.Args[0] {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if w != x1_1.Args[0] {
+               if x1_1.AuxInt != 2 {
                        break
                }
-               x2 := x1.Args[2]
-               if x2.Op != OpARM64MOVBstoreidx {
+               if idx != x1_1.Args[0] {
                        break
                }
-               _ = x2.Args[3]
-               ptr0 := x2.Args[0]
-               idx0 := x2.Args[1]
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64SRLconst {
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpARM64UBFX {
                        break
                }
-               if x2_2.AuxInt != 24 {
+               if x1_2.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               if w != x2_2.Args[0] {
+               if w != x1_2.Args[0] {
                        break
                }
-               mem := x2.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2)) {
+               x2 := x1.Args[3]
+               if x2.Op != OpARM64MOVBstoreidx {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               _ = x2.Args[3]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x.AuxInt != i-1 {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               if x.Aux != s {
+               if x2_1.AuxInt != 3 {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               if idx != x2_1.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpARM64UBFX {
                        break
                }
-               if x_1.AuxInt != 8 {
+               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
                        break
                }
-               if w != x_1.Args[0] {
+               if w != x2_2.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               mem := x2.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       // match: (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
        for {
-               if v.AuxInt != 1 {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_1.AuxInt != 1 {
                        break
                }
-               _ = v_0.Args[1]
-               ptr1 := v_0.Args[0]
-               idx1 := v_0.Args[1]
-               w := v.Args[1]
-               x := v.Args[2]
+               idx := v_1.Args[0]
+               w := v.Args[2]
+               x := v.Args[3]
                if x.Op != OpARM64MOVBstoreidx {
                        break
                }
                _ = x.Args[3]
-               ptr0 := x.Args[0]
-               idx0 := x.Args[1]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
                x_2 := x.Args[2]
-               if x_2.Op != OpARM64SRLconst {
+               if x_2.Op != OpARM64UBFX {
                        break
                }
-               if x_2.AuxInt != 8 {
+               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
                        break
                }
                if w != x_2.Args[0] {
                        break
                }
                mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
                v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
                v0.AddArg(w)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
+       // match: (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       // result: (MOVHstoreidx ptr idx w mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
-                       break
-               }
-               if x.AuxInt != i-1 {
-                       break
-               }
-               if x.Aux != s {
+               idx := v.Args[1]
+               w := v.Args[2]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVBstoreidx {
                        break
                }
-               _ = x.Args[2]
+               _ = x.Args[3]
                if ptr != x.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64UBFX {
-                       break
-               }
-               if x_1.AuxInt != arm64BFAuxInt(8, 8) {
-                       break
-               }
-               if w != x_1.Args[0] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
-       for {
-               if v.AuxInt != 1 {
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64ADDconst {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if x_1.AuxInt != 1 {
                        break
                }
-               _ = v_0.Args[1]
-               ptr1 := v_0.Args[0]
-               idx1 := v_0.Args[1]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               if idx != x_1.Args[0] {
                        break
                }
-               _ = x.Args[3]
-               ptr0 := x.Args[0]
-               idx0 := x.Args[1]
                x_2 := x.Args[2]
                if x_2.Op != OpARM64UBFX {
                        break
@@ -9093,871 +10869,1163 @@ func rewriteValueARM64_OpARM64MOVBstore_30(v *Value) bool {
                        break
                }
                mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
                v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst {
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVBstorezeroidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if x_1.AuxInt != 8 {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               x_1_0 := x_1.Args[0]
-               if x_1_0.Op != OpARM64MOVDreg {
+               v.reset(OpARM64MOVBstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezero {
                        break
                }
-               if w != x_1_0.Args[0] {
+               j := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = min(i, j)
                v.Aux = s
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.AddArg(ptr0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
+       // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
        // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       // result: (MOVHstorezeroidx ptr1 idx1 mem)
        for {
                if v.AuxInt != 1 {
                        break
                }
                s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr1 := v_0.Args[0]
-               idx1 := v_0.Args[1]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               ptr0 := x.Args[0]
-               idx0 := x.Args[1]
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64SRLconst {
-                       break
-               }
-               if x_2.AuxInt != 8 {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezeroidx {
                        break
                }
-               x_2_0 := x_2.Args[0]
-               if x_2_0.Op != OpARM64MOVDreg {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               if w != x_2_0.Args[0] {
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v *Value) bool {
+       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVBstorezero [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVBstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = c
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
+       // match: (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if x.Aux != s {
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstorezeroidx {
                        break
                }
                _ = x.Args[2]
                if ptr != x.Args[0] {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64UBFX {
+               if idx != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               if x_1.AuxInt != arm64BFAuxInt(8, 24) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               if w != x_1.Args[0] {
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
+               v.reset(OpARM64MOVDloadidx)
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       // match: (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDloadidx8 ptr idx mem)
        for {
-               if v.AuxInt != 1 {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.AuxInt != 3 {
                        break
                }
                _ = v_0.Args[1]
-               ptr1 := v_0.Args[0]
-               idx1 := v_0.Args[1]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               _ = x.Args[3]
-               ptr0 := x.Args[0]
-               idx0 := x.Args[1]
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64UBFX {
+               v.reset(OpARM64MOVDloadidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               if x_2.AuxInt != arm64BFAuxInt(8, 24) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               if w != x_2.Args[0] {
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDstorezero {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstore_40(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
+       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDload [c] ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstore {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVDload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x.Aux != s {
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx ptr (SLLconst [3] idx) mem)
+       // cond:
+       // result: (MOVDloadidx8 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               if v_1.AuxInt != 3 {
                        break
                }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64SRLconst {
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDloadidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx (SLLconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVDloadidx8 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if x_1.AuxInt != 8 {
+               if v_0.AuxInt != 3 {
                        break
                }
-               x_1_0 := x_1.Args[0]
-               if x_1_0.Op != OpARM64MOVDreg {
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDloadidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDstorezeroidx {
                        break
                }
-               if w != x_1_0.Args[0] {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDloadidx8_0(v *Value) bool {
+       // match: (MOVDloadidx8 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDload [c<<3] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = i - 1
-               v.Aux = s
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = c << 3
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
+       // match: (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               s := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr1 := v_0.Args[0]
-               idx1 := v_0.Args[1]
-               w := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               ptr0 := x.Args[0]
-               idx0 := x.Args[1]
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64SRLconst {
-                       break
-               }
-               if x_2.AuxInt != 8 {
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDstorezeroidx8 {
                        break
                }
-               x_2_0 := x_2.Args[0]
-               if x_2_0.Op != OpARM64MOVDreg {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               if w != x_2_0.Args[0] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
+       // result: (MOVDnop x)
+       for {
+               x := v.Args[0]
+               if !(x.Uses == 1) {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               v.reset(OpARM64MOVDnop)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr0)
-               v.AddArg(idx0)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstoreidx_0(v *Value) bool {
-       // match: (MOVBstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVDstore ptr (FMOVDfpgp val) mem)
        // cond:
-       // result: (MOVBstore [c] ptr val mem)
+       // result: (FMOVDstore ptr val mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64FMOVDfpgp {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = c
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64FMOVDstore)
                v.AddArg(ptr)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx (MOVDconst [c]) idx val mem)
-       // cond:
-       // result: (MOVBstore [c] idx val mem)
+       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstore [off1+off2] {sym} ptr val mem)
        for {
-               _ = v.Args[3]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVBstore)
-               v.AuxInt = c
-               v.AddArg(idx)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVBstorezeroidx ptr idx mem)
+       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstoreidx ptr idx val mem)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if v_2.AuxInt != 0 {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVBstorezeroidx)
+               v.reset(OpARM64MOVDstoreidx)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVBreg x) mem)
-       // cond:
-       // result: (MOVBstoreidx ptr idx x mem)
+       // match: (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstoreidx8 ptr idx val mem)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVBreg {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVBstoreidx)
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx8)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(x)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVBUreg x) mem)
+       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVBstoreidx ptr idx x mem)
+       // result: (MOVDstorezero [off] {sym} ptr mem)
        for {
-               _ = v.Args[3]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVBUreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVBstoreidx)
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVHreg x) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool {
+       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVBstoreidx ptr idx x mem)
+       // result: (MOVDstore [c] ptr val mem)
        for {
                _ = v.Args[3]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
+               c := v_1.AuxInt
+               val := v.Args[2]
                mem := v.Args[3]
-               v.reset(OpARM64MOVBstoreidx)
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = c
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVHUreg x) mem)
+       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVBstoreidx ptr idx x mem)
+       // result: (MOVDstore [c] idx val mem)
        for {
                _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
                mem := v.Args[3]
-               v.reset(OpARM64MOVBstoreidx)
-               v.AddArg(ptr)
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = c
                v.AddArg(idx)
-               v.AddArg(x)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVWreg x) mem)
+       // match: (MOVDstoreidx ptr (SLLconst [3] idx) val mem)
        // cond:
-       // result: (MOVBstoreidx ptr idx x mem)
+       // result: (MOVDstoreidx8 ptr idx val mem)
        for {
                _ = v.Args[3]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_2.Args[0]
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
                mem := v.Args[3]
-               v.reset(OpARM64MOVBstoreidx)
+               v.reset(OpARM64MOVDstoreidx8)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(x)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx (MOVWUreg x) mem)
+       // match: (MOVDstoreidx (SLLconst [3] idx) ptr val mem)
        // cond:
-       // result: (MOVBstoreidx ptr idx x mem)
+       // result: (MOVDstoreidx8 ptr idx val mem)
        for {
                _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               x := v_2.Args[0]
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
                mem := v.Args[3]
-               v.reset(OpARM64MOVBstoreidx)
+               v.reset(OpARM64MOVDstoreidx8)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(x)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstoreidx ptr idx w mem)
+       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVDstorezeroidx ptr idx mem)
        for {
                _ = v.Args[3]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
-                       break
-               }
-               idx := v_1.Args[0]
+               idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               if ptr != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstorezeroidx)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstoreidx_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBstoreidx ptr (ADDconst [3] idx) w x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
+func rewriteValueARM64_OpARM64MOVDstoreidx8_0(v *Value) bool {
+       // match: (MOVDstoreidx8 ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVDstore [c<<3] ptr val mem)
        for {
                _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if v_1.AuxInt != 3 {
-                       break
-               }
-               idx := v_1.Args[0]
-               w := v.Args[2]
-               x0 := v.Args[3]
-               if x0.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x0.Args[3]
-               if ptr != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x0_1.AuxInt != 2 {
-                       break
-               }
-               if idx != x0_1.Args[0] {
-                       break
-               }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpARM64UBFX {
-                       break
-               }
-               if x0_2.AuxInt != arm64BFAuxInt(8, 24) {
-                       break
-               }
-               if w != x0_2.Args[0] {
-                       break
-               }
-               x1 := x0.Args[3]
-               if x1.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x1.Args[3]
-               if ptr != x1.Args[0] {
-                       break
-               }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x1_1.AuxInt != 1 {
-                       break
-               }
-               if idx != x1_1.Args[0] {
-                       break
-               }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpARM64UBFX {
-                       break
-               }
-               if x1_2.AuxInt != arm64BFAuxInt(16, 16) {
-                       break
-               }
-               if w != x1_2.Args[0] {
-                       break
-               }
-               x2 := x1.Args[3]
-               if x2.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x2.Args[3]
-               if ptr != x2.Args[0] {
-                       break
-               }
-               if idx != x2.Args[1] {
-                       break
-               }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64UBFX {
-                       break
-               }
-               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
-                       break
-               }
-               if w != x2_2.Args[0] {
-                       break
-               }
-               mem := x2.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = c << 3
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx w x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVWstoreidx ptr idx w mem)
-       for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x0 := v.Args[3]
-               if x0.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x0.Args[3]
-               if ptr != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x0_1.AuxInt != 1 {
-                       break
-               }
-               if idx != x0_1.Args[0] {
-                       break
-               }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpARM64UBFX {
-                       break
-               }
-               if x0_2.AuxInt != arm64BFAuxInt(8, 24) {
-                       break
-               }
-               if w != x0_2.Args[0] {
+       // match: (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               x1 := x0.Args[3]
-               if x1.Op != OpARM64MOVBstoreidx {
+               if v_2.AuxInt != 0 {
                        break
                }
-               _ = x1.Args[3]
-               if ptr != x1.Args[0] {
+               mem := v.Args[3]
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               if x1_1.AuxInt != 2 {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               if idx != x1_1.Args[0] {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpARM64UBFX {
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if x1_2.AuxInt != arm64BFAuxInt(16, 16) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               if w != x1_2.Args[0] {
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               x2 := x1.Args[3]
-               if x2.Op != OpARM64MOVBstoreidx {
+               if v_0.AuxInt != 3 {
                        break
                }
-               _ = x2.Args[3]
-               if ptr != x2.Args[0] {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezero {
                        break
                }
-               if x2_1.AuxInt != 3 {
+               j := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               if idx != x2_1.Args[0] {
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpARM64UBFX {
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVQstorezero [0] {s} p0 mem)
+       for {
+               if v.AuxInt != 8 {
                        break
                }
-               if x2_2.AuxInt != arm64BFAuxInt(24, 8) {
+               s := v.Aux
+               _ = v.Args[1]
+               p0 := v.Args[0]
+               if p0.Op != OpARM64ADD {
                        break
                }
-               if w != x2_2.Args[0] {
+               _ = p0.Args[1]
+               ptr0 := p0.Args[0]
+               idx0 := p0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezeroidx {
                        break
                }
-               mem := x2.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(w)
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = 0
+               v.Aux = s
+               v.AddArg(p0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
+       // match: (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVQstorezero [0] {s} p0 mem)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
-                       break
-               }
-               idx := v_1.Args[0]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpARM64MOVBstoreidx {
+               if v.AuxInt != 8 {
                        break
                }
-               _ = x.Args[3]
-               if ptr != x.Args[0] {
+               s := v.Aux
+               _ = v.Args[1]
+               p0 := v.Args[0]
+               if p0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if idx != x.Args[1] {
+               if p0.AuxInt != 3 {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64UBFX {
+               _ = p0.Args[1]
+               ptr0 := p0.Args[0]
+               idx0 := p0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezeroidx8 {
                        break
                }
-               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               if w != x_2.Args[0] {
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = 0
+               v.Aux = s
+               v.AddArg(p0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
+       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDstorezero [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVDstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c
                v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstoreidx ptr idx w mem)
+       // match: (MOVDstorezeroidx ptr (SLLconst [3] idx) mem)
+       // cond:
+       // result: (MOVDstorezeroidx8 ptr idx mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x := v.Args[3]
-               if x.Op != OpARM64MOVBstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               if ptr != x.Args[0] {
-                       break
-               }
-               x_1 := x.Args[1]
-               if x_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x_1.AuxInt != 1 {
-                       break
-               }
-               if idx != x_1.Args[0] {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               x_2 := x.Args[2]
-               if x_2.Op != OpARM64UBFX {
+               if v_1.AuxInt != 3 {
                        break
                }
-               if x_2.AuxInt != arm64BFAuxInt(8, 8) {
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezeroidx (SLLconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVDstorezeroidx8 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if w != x_2.Args[0] {
+               if v_0.AuxInt != 3 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezeroidx8)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezeroidx8_0(v *Value) bool {
+       // match: (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVDstorezero [c<<3] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = c << 3
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
+       // result: (MOVHUload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -9972,16 +12040,67 @@ func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
+               v.reset(OpARM64MOVHUload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHUloadidx2 ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -9997,183 +12116,372 @@ func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
+       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVHUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVHUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUloadidx ptr (SLLconst [1] idx) mem)
+       // cond:
+       // result: (MOVHUloadidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUloadidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHUloadidx ptr (ADD idx idx) mem)
+       // cond:
+       // result: (MOVHUloadidx2 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUloadidx2)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVBstorezeroidx ptr idx mem)
+       // match: (MOVHUloadidx (ADD idx idx) ptr mem)
+       // cond:
+       // result: (MOVHUloadidx2 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVBstorezeroidx)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVBstorezero {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUloadidx2_0(v *Value) bool {
+       // match: (MOVHUloadidx2 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVHUload [c<<1] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHUload)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVHstorezeroidx ptr1 idx1 mem)
+       // match: (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
        for {
-               if v.AuxInt != 1 {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx2 {
                        break
                }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVBstorezeroidx {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
+       // match: (MOVHUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
                _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVBstorezeroidx_0(v *Value) bool {
-       // match: (MOVBstorezeroidx ptr (MOVDconst [c]) mem)
+       // match: (MOVHUreg x:(MOVHUloadidx2 _ _ _))
        // cond:
-       // result: (MOVBstorezero [c] ptr mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstorezeroidx (MOVDconst [c]) idx mem)
+       // match: (MOVHUreg x:(MOVBUreg _))
        // cond:
-       // result: (MOVBstorezero [c] idx mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVBstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // match: (MOVHUreg x:(MOVHUreg _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
                        break
                }
-               if v_1.AuxInt != 1 {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg (ANDconst [c] x))
+       // cond:
+       // result: (ANDconst [c&(1<<16-1)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               idx := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVBstorezeroidx {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<16 - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint16(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint16(c))
+               return true
+       }
+       // match: (MOVHUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<16-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
                        break
                }
-               if idx != x.Args[1] {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHUreg_10(v *Value) bool {
+       // match: (MOVHUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<16-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 16)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 16)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDload [off1+off2] {sym} ptr mem)
+       // result: (MOVHload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -10188,16 +12496,16 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDload [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDloadidx ptr idx mem)
+       // result: (MOVHloadidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -10213,15 +12521,15 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDloadidx)
+               v.reset(OpARM64MOVHloadidx)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // match: (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVDloadidx8 ptr idx mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -10230,7 +12538,7 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
                if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if v_0.AuxInt != 3 {
+               if v_0.AuxInt != 1 {
                        break
                }
                _ = v_0.Args[1]
@@ -10240,15 +12548,15 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDloadidx8)
+               v.reset(OpARM64MOVHloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -10264,14 +12572,14 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
+       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
        // result: (MOVDconst [0])
        for {
@@ -10280,7 +12588,7 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
                _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDstorezero {
+               if v_1.Op != OpARM64MOVHstorezero {
                        break
                }
                off2 := v_1.AuxInt
@@ -10296,10 +12604,10 @@ func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
-       // match: (MOVDloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
+       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDload [c] ptr mem)
+       // result: (MOVHload [c] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -10309,15 +12617,15 @@ func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVDload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVDload [c] ptr mem)
+       // result: (MOVHload [c] ptr mem)
        for {
                _ = v.Args[2]
                v_0 := v.Args[0]
@@ -10327,15 +12635,15 @@ func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
                c := v_0.AuxInt
                ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVDload)
+               v.reset(OpARM64MOVHload)
                v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDloadidx ptr (SLLconst [3] idx) mem)
+       // match: (MOVHloadidx ptr (SLLconst [1] idx) mem)
        // cond:
-       // result: (MOVDloadidx8 ptr idx mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -10343,477 +12651,383 @@ func rewriteValueARM64_OpARM64MOVDloadidx_0(v *Value) bool {
                if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if v_1.AuxInt != 1 {
                        break
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVDloadidx8)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDloadidx (SLLconst [3] idx) ptr mem)
-       // cond:
-       // result: (MOVDloadidx8 ptr idx mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               if v_0.AuxInt != 3 {
-                       break
-               }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDloadidx8)
+               v.reset(OpARM64MOVHloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDstorezeroidx {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDloadidx8_0(v *Value) bool {
-       // match: (MOVDloadidx8 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVDload [c<<3] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDload)
-               v.AuxInt = c << 3
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDstorezeroidx8 {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
-                       break
-               }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDreg_0(v *Value) bool {
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
-       // result: (MOVDnop x)
-       for {
-               x := v.Args[0]
-               if !(x.Uses == 1) {
-                       break
-               }
-               v.reset(OpARM64MOVDnop)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVDreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVDstore ptr (FMOVDfpgp val) mem)
+       // match: (MOVHloadidx ptr (ADD idx idx) mem)
        // cond:
-       // result: (FMOVDstore ptr val mem)
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64FMOVDfpgp {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64FMOVDstore)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_1.Op != OpARM64ADD {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDstoreidx ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
                mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
-                       break
-               }
-               v.reset(OpARM64MOVDstoreidx)
+               v.reset(OpARM64MOVHloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDstoreidx8 ptr idx val mem)
+       // match: (MOVHloadidx (ADD idx idx) ptr mem)
+       // cond:
+       // result: (MOVHloadidx2 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 3 {
+               if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx8)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHloadidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHloadidx2_0(v *Value) bool {
+       // match: (MOVHloadidx2 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVDstorezero [off] {sym} ptr mem)
+       // result: (MOVHload [c<<1] ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
-                       break
-               }
+               c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVHload)
+               v.AuxInt = c << 1
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
+       // match: (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHstorezeroidx2 {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstoreidx_0(v *Value) bool {
-       // match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
+       // match: (MOVHreg x:(MOVBload _ _))
        // cond:
-       // result: (MOVDstore [c] ptr val mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDstoreidx (MOVDconst [c]) idx val mem)
+       // match: (MOVHreg x:(MOVBUload _ _))
        // cond:
-       // result: (MOVDstore [c] idx val mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDstoreidx ptr (SLLconst [3] idx) val mem)
+       // match: (MOVHreg x:(MOVHload _ _))
        // cond:
-       // result: (MOVDstoreidx8 ptr idx val mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHload {
                        break
                }
-               if v_1.AuxInt != 3 {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstoreidx8)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDstoreidx (SLLconst [3] idx) ptr val mem)
+       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
        // cond:
-       // result: (MOVDstoreidx8 ptr idx val mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if v_0.AuxInt != 3 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVHloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstoreidx8)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVDstoreidx ptr idx (MOVDconst [0]) mem)
+       // match: (MOVHreg x:(MOVHloadidx2 _ _ _))
        // cond:
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx2 {
                        break
                }
-               if v_2.AuxInt != 0 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
+                       break
+               }
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVHreg x:(MOVHreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstoreidx8_0(v *Value) bool {
-       // match: (MOVDstoreidx8 ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MOVHreg_10(v *Value) bool {
+       // match: (MOVHreg (MOVDconst [c]))
        // cond:
-       // result: (MOVDstore [c<<3] ptr val mem)
+       // result: (MOVDconst [int64(int16(c))])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = c << 3
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int16(c))
                return true
        }
-       // match: (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // match: (MOVHreg (SLLconst [lc] x))
+       // cond: lc < 16
+       // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 16) {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVDstorezeroidx8)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 16-lc)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
+       // result: (MOVHstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstoreidx ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVHstoreidx2 ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDaddr {
                        break
@@ -10821,332 +13035,371 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDstorezeroidx ptr idx mem)
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVHstorezero [off] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_1.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHreg {
                        break
                }
-               if v_0.AuxInt != 3 {
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHUreg {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx8)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVHstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
                i := v.AuxInt
                s := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVDstorezero {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
+                       break
+               }
+               if x.AuxInt != i-2 {
                        break
                }
-               j := x.AuxInt
                if x.Aux != s {
                        break
                }
-               _ = x.Args[1]
+               _ = x.Args[2]
                ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if w != x.Args[1] {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = min(i, j)
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
                v.Aux = s
                v.AddArg(ptr0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
        // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVQstorezero [0] {s} p0 mem)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               if v.AuxInt != 8 {
+               if v.AuxInt != 2 {
                        break
                }
                s := v.Aux
-               _ = v.Args[1]
-               p0 := v.Args[0]
-               if p0.Op != OpARM64ADD {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = p0.Args[1]
-               ptr0 := p0.Args[0]
-               idx0 := p0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVDstorezeroidx {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[2]
+               if v_1.AuxInt != 16 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
                ptr1 := x.Args[0]
                idx1 := x.Args[1]
-               mem := x.Args[2]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
                if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = 0
-               v.Aux = s
-               v.AddArg(p0)
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
        // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVQstorezero [0] {s} p0 mem)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
        for {
-               if v.AuxInt != 8 {
+               if v.AuxInt != 2 {
                        break
                }
                s := v.Aux
-               _ = v.Args[1]
-               p0 := v.Args[0]
-               if p0.Op != OpARM64ADDshiftLL {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if p0.AuxInt != 3 {
+               if v_0.AuxInt != 1 {
                        break
                }
-               _ = p0.Args[1]
-               ptr0 := p0.Args[0]
-               idx0 := p0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVDstorezeroidx8 {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               if v_1.AuxInt != 16 {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = 0
-               v.Aux = s
-               v.AddArg(p0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstorezeroidx_0(v *Value) bool {
-       // match: (MOVDstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVDstorezero [c] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstorezeroidx (MOVDconst [c]) idx mem)
-       // cond:
-       // result: (MOVDstorezero [c] idx mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDstorezeroidx ptr (SLLconst [3] idx) mem)
-       // cond:
-       // result: (MOVDstorezeroidx8 ptr idx mem)
+       // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               ptr0 := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezeroidx8)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVDstorezeroidx (SLLconst [3] idx) ptr mem)
-       // cond:
-       // result: (MOVDstorezeroidx8 ptr idx mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               if v_0.AuxInt != 3 {
+               if x.AuxInt != i-2 {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezeroidx8)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVDstorezeroidx8_0(v *Value) bool {
-       // match: (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVDstorezero [c<<3] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if x.Aux != s {
                        break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = c << 3
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHUload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHUloadidx ptr idx mem)
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               v.reset(OpARM64MOVHUloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHUloadidx2 ptr idx mem)
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
@@ -11155,454 +13408,303 @@ func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64UBFX {
                        break
                }
-               v.reset(OpARM64MOVHUloadidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
                        break
                }
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHstorezero {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUloadidx_0(v *Value) bool {
-       // match: (MOVHUloadidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVHUload [c] ptr mem)
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w mem)
        for {
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               ptr0 := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUloadidx (MOVDconst [c]) ptr mem)
-       // cond:
-       // result: (MOVHUload [c] ptr mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1.AuxInt != 16 {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUloadidx ptr (SLLconst [1] idx) mem)
-       // cond:
-       // result: (MOVHUloadidx2 ptr idx mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               if v_1.AuxInt != 1 {
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUloadidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUloadidx ptr (ADD idx idx) mem)
-       // cond:
-       // result: (MOVHUloadidx2 ptr idx mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADD {
+               if x.AuxInt != i-2 {
                        break
                }
-               _ = v_1.Args[1]
-               idx := v_1.Args[0]
-               if idx != v_1.Args[1] {
+               if x.Aux != s {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUloadidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUloadidx (ADD idx idx) ptr mem)
-       // cond:
-       // result: (MOVHUloadidx2 ptr idx mem)
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w mem)
        for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = v_0.Args[1]
-               idx := v_0.Args[0]
-               if idx != v_0.Args[1] {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if v_1.AuxInt != 16 {
                        break
                }
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUloadidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHstorezeroidx {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUloadidx2_0(v *Value) bool {
-       // match: (MOVHUloadidx2 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVHUload [c<<1] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHUload)
-               v.AuxInt = c << 1
-               v.AddArg(ptr)
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHstorezeroidx2 {
+               if v.AuxInt != 2 {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+               s := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUreg_0(v *Value) bool {
-       // match: (MOVHUreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               if v_0.AuxInt != 1 {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               if v_1.AuxInt != 16 {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVDreg {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUloadidx2 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx2 {
+               w := v_1_0.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg x:(MOVHUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHUreg (ANDconst [c] x))
-       // cond:
-       // result: (ANDconst [c&(1<<16-1)] x)
+       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<16 - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHUreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint16(c))])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstore {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint16(c))
-               return true
-       }
-       // match: (MOVHUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<16-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if x.AuxInt != i-2 {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
+               if x.Aux != s {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHUreg_10(v *Value) bool {
-       // match: (MOVHUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<16-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 16)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
+               if w0.AuxInt != j-16 {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 16)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if w != w0.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHloadidx ptr idx mem)
+       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstoreidx ptr1 idx1 w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVHloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-16 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVHloadidx2 ptr idx mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstore_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
@@ -11611,110 +13713,93 @@ func rewriteValueARM64_OpARM64MOVHload_0(v *Value) bool {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVHloadidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVHstoreidx2 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHstorezero {
+               if w0.AuxInt != j-16 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if w != w0.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
-       // match: (MOVHloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVHstoreidx_0(v *Value) bool {
+       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVHload [c] ptr mem)
+       // result: (MOVHstore [c] ptr val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHload)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = c
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
        // cond:
-       // result: (MOVHload [c] ptr mem)
+       // result: (MOVHstore [c] idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHload)
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
                v.AuxInt = c
-               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx ptr (SLLconst [1] idx) mem)
+       // match: (MOVHstoreidx ptr (SLLconst [1] idx) val mem)
        // cond:
-       // result: (MOVHloadidx2 ptr idx mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64SLLconst {
@@ -11724,18 +13809,20 @@ func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
                        break
                }
                idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHloadidx2)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx ptr (ADD idx idx) mem)
+       // match: (MOVHstoreidx ptr (ADD idx idx) val mem)
        // cond:
-       // result: (MOVHloadidx2 ptr idx mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64ADD {
@@ -11746,18 +13833,43 @@ func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
                if idx != v_1.Args[1] {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHloadidx2)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstoreidx (SLLconst [1] idx) ptr val mem)
+       // cond:
+       // result: (MOVHstoreidx2 ptr idx val mem)
+       for {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx (ADD idx idx) ptr mem)
+       // match: (MOVHstoreidx (ADD idx idx) ptr val mem)
        // cond:
-       // result: (MOVHloadidx2 ptr idx mem)
+       // result: (MOVHstoreidx2 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -11768,280 +13880,353 @@ func rewriteValueARM64_OpARM64MOVHloadidx_0(v *Value) bool {
                        break
                }
                ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHloadidx2)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHstorezeroidx {
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHloadidx2_0(v *Value) bool {
-       // match: (MOVHloadidx2 ptr (MOVDconst [c]) mem)
+       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
        // cond:
-       // result: (MOVHload [c<<1] ptr mem)
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHload)
-               v.AuxInt = c << 1
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHstorezeroidx2 {
-                       break
-               }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHreg_0(v *Value) bool {
-       // match: (MOVHreg x:(MOVBload _ _))
+       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVBUload _ _))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstoreidx_10(v *Value) bool {
+       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstoreidx ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx ptr idx w mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
+               if v_1.AuxInt != 2 {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               idx := v_1.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64SRLconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVHreg x:(MOVHloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx {
+               if v_2.AuxInt != 16 {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVHstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVHloadidx2 _ _ _))
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstoreidx2_0(v *Value) bool {
+       // match: (MOVHstoreidx2 ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstore [c<<1] ptr val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx2 {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVBreg _))
+       // match: (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               if v_2.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstorezeroidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVBUreg _))
+       // match: (MOVHstoreidx2 ptr idx (MOVHreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHreg {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg x:(MOVHreg _))
+       // match: (MOVHstoreidx2 ptr idx (MOVHUreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVHUreg {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHreg_10(v *Value) bool {
-       // match: (MOVHreg (MOVDconst [c]))
+       // match: (MOVHstoreidx2 ptr idx (MOVWreg x) mem)
        // cond:
-       // result: (MOVDconst [int64(int16(c))])
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int16(c))
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVHreg (SLLconst [lc] x))
-       // cond: lc < 16
-       // result: (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
+       // match: (MOVHstoreidx2 ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVHstoreidx2 ptr idx x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 16) {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 16-lc)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVHstoreidx2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
+       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVHstore)
+               v.reset(OpARM64MOVHstorezero)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} (ADD ptr idx) val mem)
+       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHstoreidx ptr idx val mem)
+       // result: (MOVHstorezeroidx ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -12049,25 +14234,23 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx)
+               v.reset(OpARM64MOVHstorezeroidx)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem)
+       // match: (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
@@ -12078,1224 +14261,1418 @@ func rewriteValueARM64_OpARM64MOVHstore_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstoreidx2)
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVWstorezeroidx ptr1 idx1 mem)
+       for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
+       for {
+               if v.AuxInt != 2 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezeroidx2 {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx1)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
+       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVHstorezero [off] {sym} ptr mem)
+       // result: (MOVHstorezero [c] ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVHstorezero [c] idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
+               c := v_0.AuxInt
+               idx := v.Args[1]
                mem := v.Args[2]
                v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.AuxInt = c
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
+       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
        // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHreg {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_1.Args[0]
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVHUreg x) mem)
+       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
        // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVHUreg {
+               if v_1.Op != OpARM64ADD {
+                       break
+               }
+               _ = v_1.Args[1]
+               idx := v_1.Args[0]
+               if idx != v_1.Args[1] {
                        break
                }
-               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVWreg x) mem)
+       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
        // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               x := v_1.Args[0]
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [off] {sym} ptr (MOVWUreg x) mem)
+       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
        // cond:
-       // result: (MOVHstore [off] {sym} ptr x mem)
+       // result: (MOVHstorezeroidx2 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               x := v_1.Args[0]
+               _ = v_0.Args[1]
+               idx := v_0.Args[0]
+               if idx != v_0.Args[1] {
+                       break
+               }
+               ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpARM64MOVHstorezeroidx2)
                v.AddArg(ptr)
-               v.AddArg(x)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstorezeroidx ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               if v_1.AuxInt != 16 {
+               if v_1.AuxInt != 2 {
                        break
                }
-               w := v_1.Args[0]
+               idx := v_1.Args[0]
                x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               if x.Op != OpARM64MOVHstorezeroidx {
                        break
                }
-               if x.Aux != s {
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               if idx != x.Args[1] {
                        break
                }
                mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool {
+       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVHstorezero [c<<1] ptr mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = c << 1
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 1 {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx2 {
-                       break
-               }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(w)
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               if x.Aux != s {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx ptr idx mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
+               v.reset(OpARM64MOVWUloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWUloadidx4 ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               if v_0.AuxInt != 2 {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
+       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               if v_0.AuxInt != 1 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64UBFX {
-                       break
-               }
-               if v_1.AuxInt != arm64BFAuxInt(16, 16) {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx2 {
+               if v_1.Op != OpARM64MOVWstorezero {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
+       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(w)
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
+       // cond:
+       // result: (MOVWUload [c] ptr mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w mem)
+       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
+       // cond:
+       // result: (MOVWUloadidx4 ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
                _ = v.Args[2]
-               ptr0 := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
-                       break
-               }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if x.Aux != s {
+               if v_1.AuxInt != 2 {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVWUloadidx4 ptr idx mem)
+       for {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w mem)
+       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               if v_1.AuxInt != 16 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
+       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWUload [c<<2] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWUload)
+               v.AuxInt = c << 2
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
+       // match: (MOVWUreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
-               v.AddArg(mem)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
+       // match: (MOVWUreg x:(MOVWUload _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               if v.AuxInt != 2 {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUload {
                        break
                }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if v_0.AuxInt != 1 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx {
                        break
                }
-               if v_1.AuxInt != 16 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpARM64MOVDreg {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUloadidx4 {
                        break
                }
-               w := v_1_0.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx2 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg x:(MOVHUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUreg {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
+       // match: (MOVWUreg x:(MOVWUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstore [i-2] {s} ptr0 w0 mem)
+       // match: (MOVWUreg (ANDconst [c] x))
+       // cond:
+       // result: (ANDconst [c&(1<<32-1)] x)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstore {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c & (1<<32 - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint32(c))
+               return true
+       }
+       // match: (MOVWUreg (SLLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, sc)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if x.Aux != s {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWUreg (SRLconst [sc] x))
+       // cond: isARM64BFMask(sc, 1<<32-1, 0)
+       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               if w0.AuxInt != j-16 {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, 32)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstoreidx ptr1 idx1 w0 mem)
+       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWloadidx ptr idx mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
-                       break
-               }
-               if w != w0.Args[0] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
+               v.reset(OpARM64MOVWloadidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstore_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
+       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if v_0.AuxInt != 1 {
+               if v_0.AuxInt != 2 {
                        break
                }
                _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstoreidx2 {
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64MOVWloadidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               if w0.AuxInt != j-16 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[1]
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstoreidx_0(v *Value) bool {
-       // match: (MOVHstoreidx ptr (MOVDconst [c]) val mem)
+func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
+       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVHstore [c] ptr val mem)
+       // result: (MOVWload [c] ptr mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstore)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
                v.AuxInt = c
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx (MOVDconst [c]) idx val mem)
+       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
        // cond:
-       // result: (MOVHstore [c] idx val mem)
+       // result: (MOVWload [c] ptr mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstore)
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
                v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr (SLLconst [1] idx) val mem)
+       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
        // cond:
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if v_1.AuxInt != 2 {
                        break
                }
                idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr (ADD idx idx) val mem)
+       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
        // cond:
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // result: (MOVWloadidx4 ptr idx mem)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADD {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               _ = v_1.Args[1]
-               idx := v_1.Args[0]
-               if idx != v_1.Args[1] {
+               if v_0.AuxInt != 2 {
                        break
                }
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWloadidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx (SLLconst [1] idx) ptr val mem)
-       // cond:
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
+       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               if v_0.AuxInt != 1 {
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool {
+       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWload [c<<2] ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWload)
+               v.AuxInt = c << 2
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx (ADD idx idx) ptr val mem)
+       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
+       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWstorezeroidx4 {
+                       break
+               }
+               _ = v_2.Args[2]
+               ptr2 := v_2.Args[0]
+               idx2 := v_2.Args[1]
+               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
+       // match: (MOVWreg x:(MOVBload _ _))
        // cond:
-       // result: (MOVHstoreidx2 ptr idx val mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBload {
                        break
                }
-               _ = v_0.Args[1]
-               idx := v_0.Args[0]
-               if idx != v_0.Args[1] {
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUload {
                        break
                }
-               ptr := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVDconst [0]) mem)
+       // match: (MOVWreg x:(MOVHload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVHUload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVWload _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWload {
+                       break
+               }
+               _ = x.Args[1]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBloadidx _ _ _))
        // cond:
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBloadidx {
                        break
                }
-               if v_2.AuxInt != 0 {
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUloadidx {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVHreg x) mem)
+       // match: (MOVWreg x:(MOVHloadidx _ _ _))
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVHUreg x) mem)
+       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHUreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr idx (MOVWreg x) mem)
+       // match: (MOVWreg x:(MOVWloadidx _ _ _))
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWloadidx {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstoreidx_10(v *Value) bool {
-       // match: (MOVHstoreidx ptr idx (MOVWUreg x) mem)
+func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
+       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
        // cond:
-       // result: (MOVHstoreidx ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHloadidx2 {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx ptr idx w mem)
+       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
+       // cond:
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpARM64MOVHstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               if ptr != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstoreidx2_0(v *Value) bool {
-       // match: (MOVHstoreidx2 ptr (MOVDconst [c]) val mem)
+       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
        // cond:
-       // result: (MOVHstore [c<<1] ptr val mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWloadidx4 {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstore)
-               v.AuxInt = c << 1
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               _ = x.Args[2]
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem)
+       // match: (MOVWreg x:(MOVBreg _))
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBreg {
                        break
                }
-               if v_2.AuxInt != 0 {
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVWreg x:(MOVBUreg _))
+       // cond:
+       // result: (MOVDreg x)
+       for {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVBUreg {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDreg)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVHreg x) mem)
+       // match: (MOVWreg x:(MOVHreg _))
        // cond:
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVHUreg x) mem)
+       // match: (MOVWreg x:(MOVHreg _))
        // cond:
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVHUreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVHreg {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVWreg x) mem)
+       // match: (MOVWreg x:(MOVWreg _))
        // cond:
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // result: (MOVDreg x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               x := v.Args[0]
+               if x.Op != OpARM64MOVWreg {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               v.reset(OpARM64MOVDreg)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVHstoreidx2 ptr idx (MOVWUreg x) mem)
+       // match: (MOVWreg (MOVDconst [c]))
        // cond:
-       // result: (MOVHstoreidx2 ptr idx x mem)
+       // result: (MOVDconst [int64(int32(c))])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVHstoreidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c))
+               return true
+       }
+       // match: (MOVWreg (SLLconst [lc] x))
+       // cond: lc < 32
+       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < 32) {
+                       break
+               }
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
        b := v.Block
        _ = b
        config := b.Func.Config
        _ = config
-       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               v.reset(OpARM64MOVHstorezero)
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
-                       break
-               }
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstorezero [off] {sym} (ADD ptr idx) mem)
+       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHstorezeroidx ptr idx mem)
+       // result: (MOVWstoreidx ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
@@ -13303,443 +15680,392 @@ func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx)
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem)
+       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
        // cond: off == 0 && sym == nil
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               if v_0.AuxInt != 1 {
+               if v_0.AuxInt != 2 {
                        break
                }
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVHstorezero {
-                       break
-               }
-               j := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVWstorezeroidx ptr1 idx1 mem)
-       for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVHstorezeroidx {
-                       break
-               }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
-       for {
-               if v.AuxInt != 2 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 1 {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVHstorezeroidx2 {
-                       break
-               }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
-                       break
-               }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezeroidx_0(v *Value) bool {
-       // match: (MOVHstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVHstorezero [c] ptr mem)
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               c := v_1.AuxInt
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVHstorezeroidx (MOVDconst [c]) idx mem)
-       // cond:
-       // result: (MOVHstorezero [c] idx mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (SLLconst [1] idx) mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWstorezero [off] {sym} ptr mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if v_1.AuxInt != 0 {
                        break
                }
-               idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (ADD idx idx) mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_1.Args[1]
-               idx := v_1.Args[0]
-               if idx != v_1.Args[1] {
+               if v_1.Op != OpARM64MOVWreg {
                        break
                }
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx (SLLconst [1] idx) ptr mem)
+       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
        // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               if v_0.AuxInt != 1 {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx (ADD idx idx) ptr mem)
-       // cond:
-       // result: (MOVHstorezeroidx2 ptr idx mem)
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w mem)
        for {
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = v_0.Args[1]
-               idx := v_0.Args[0]
-               if idx != v_0.Args[1] {
+               if v_1.AuxInt != 32 {
                        break
                }
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezeroidx2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstorezeroidx ptr idx mem)
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w mem)
        for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               if v_1.AuxInt != 2 {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               idx := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVHstorezeroidx {
+               if v_1.AuxInt != 32 {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
                        break
                }
-               if idx != x.Args[1] {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVHstorezeroidx2_0(v *Value) bool {
-       // match: (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVHstorezero [c<<1] ptr mem)
+       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
        for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVHstorezero)
-               v.AuxInt = c << 1
-               v.AddArg(ptr)
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx4 {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {sym} ptr mem)
+       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr0 := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstore {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if x.AuxInt != i-4 {
                        break
                }
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               if x.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               w0 := x.Args[1]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx ptr idx mem)
+       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADD {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64ADDshiftLL {
                        break
@@ -13748,556 +16074,656 @@ func rewriteValueARM64_OpARM64MOVWUload_0(v *Value) bool {
                        break
                }
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWUloadidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstoreidx4 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = x.Args[3]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               w0 := x.Args[2]
+               if w0.Op != OpARM64SRLconst {
                        break
                }
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
+       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
+       // cond:
+       // result: (MOVWstore [c] ptr val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               c := v_1.AuxInt
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
+       // cond:
+       // result: (MOVWstore [c] idx val mem)
+       for {
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = c
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUloadidx_0(v *Value) bool {
-       // match: (MOVWUloadidx ptr (MOVDconst [c]) mem)
+       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
        // cond:
-       // result: (MOVWUload [c] ptr mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = c
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx (MOVDconst [c]) ptr mem)
+       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
        // cond:
-       // result: (MOVWUload [c] ptr mem)
+       // result: (MOVWstoreidx4 ptr idx val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
                ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
-               v.AuxInt = c
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx ptr (SLLconst [2] idx) mem)
+       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
        // cond:
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // result: (MOVWstorezeroidx ptr idx mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if v_2.AuxInt != 0 {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWUloadidx4)
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx (SLLconst [2] idx) ptr mem)
+       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
        // cond:
-       // result: (MOVWUloadidx4 ptr idx mem)
+       // result: (MOVWstoreidx ptr idx x mem)
        for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               if v_0.AuxInt != 2 {
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstoreidx ptr idx x mem)
+       for {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWUloadidx4)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx)
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVDstoreidx ptr idx w mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
-               idx := v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if v_1.AuxInt != 4 {
+                       break
+               }
+               idx := v_1.Args[0]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx {
+               if v_2.Op != OpARM64SRLconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               if v_2.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpARM64MOVWstoreidx {
+                       break
+               }
+               _ = x.Args[3]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstoreidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWUloadidx4_0(v *Value) bool {
-       // match: (MOVWUloadidx4 ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool {
+       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
        // cond:
-       // result: (MOVWUload [c<<2] ptr mem)
+       // result: (MOVWstore [c<<2] ptr val mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWUload)
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstore)
                v.AuxInt = c << 2
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx4 {
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+               if v_2.AuxInt != 0 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUreg_0(v *Value) bool {
-       // match: (MOVWUreg x:(MOVBUload _ _))
+       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVWstoreidx4 ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWreg {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUload _ _))
+       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVWstoreidx4 ptr idx x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVWUreg {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               x := v_2.Args[0]
+               mem := v.Args[3]
+               v.reset(OpARM64MOVWstoreidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVWUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWUload {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVWUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWUloadidx {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUloadidx2 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx2 {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVWUloadidx4 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWUloadidx4 {
+               if v_0.AuxInt != 2 {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg x:(MOVBUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(off == 0 && sym == nil) {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64MOVWstorezeroidx4)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg x:(MOVHUreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUreg {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezero {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUreg_10(v *Value) bool {
-       // match: (MOVWUreg x:(MOVWUreg _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWUreg {
+               j := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWUreg (ANDconst [c] x))
-       // cond:
-       // result: (ANDconst [c&(1<<32-1)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c & (1<<32 - 1)
-               v.AddArg(x)
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(uint32(c))])
+       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
+       // result: (MOVDstorezeroidx ptr1 idx1 mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v.AuxInt != 4 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint32(c))
-               return true
-       }
-       // match: (MOVWUreg (SLLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, sc)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
-       for {
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64ADD {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
-               v.AddArg(x)
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr1)
+               v.AddArg(idx1)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWUreg (SRLconst [sc] x))
-       // cond: isARM64BFMask(sc, 1<<32-1, 0)
-       // result: (UBFX [arm64BFAuxInt(sc, 32)] x)
+       // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
+       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
+       // result: (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
        for {
+               if v.AuxInt != 4 {
+                       break
+               }
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64ADDshiftLL {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, 32)
-               v.AddArg(x)
+               _ = v_0.Args[1]
+               ptr0 := v_0.Args[0]
+               idx0 := v_0.Args[1]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezeroidx4 {
+                       break
+               }
+               _ = x.Args[2]
+               ptr1 := x.Args[0]
+               idx1 := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr1)
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx1)
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWload_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
+func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
+       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
+       // cond:
+       // result: (MOVWstorezero [c] ptr mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx ptr idx mem)
+       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
+       // cond:
+       // result: (MOVWstorezero [c] idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVWloadidx)
-               v.AddArg(ptr)
+               c := v_0.AuxInt
+               idx := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWloadidx4 ptr idx mem)
+       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 2 {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64MOVWloadidx4)
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezeroidx4)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVWstorezeroidx4 ptr idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezeroidx4)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVDconst [0])
+       // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVDstorezeroidx ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWstorezero {
+               if v_1.Op != OpARM64ADDconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[1]
-               ptr2 := v_1.Args[0]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               if v_1.AuxInt != 4 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               idx := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVWstorezeroidx {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezeroidx)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
-       // match: (MOVWloadidx ptr (MOVDconst [c]) mem)
+func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool {
+       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
        // cond:
-       // result: (MOVWload [c] ptr mem)
+       // result: (MOVWstorezero [c<<2] ptr mem)
        for {
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14307,2852 +16733,2271 @@ func rewriteValueARM64_OpARM64MOVWloadidx_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = c
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = c << 2
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx (MOVDconst [c]) ptr mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
+       // match: (MUL (NEG x) y)
        // cond:
-       // result: (MOVWload [c] ptr mem)
+       // result: (MNEG x y)
        for {
-               _ = v.Args[2]
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64NEG {
+                       break
+               }
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MUL y (NEG x))
+       // cond:
+       // result: (MNEG x y)
+       for {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64NEG {
+                       break
+               }
+               x := v_1.Args[0]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (MUL x (MOVDconst [-1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [-1]) x)
+       // cond:
+       // result: (NEG x)
+       for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWloadidx ptr (SLLconst [2] idx) mem)
+       // match: (MUL _ (MOVDconst [0]))
        // cond:
-       // result: (MOVWloadidx4 ptr idx mem)
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if v_1.AuxInt != 0 {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWloadidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVWloadidx (SLLconst [2] idx) ptr mem)
+       // match: (MUL (MOVDconst [0]) _)
        // cond:
-       // result: (MOVWloadidx4 ptr idx mem)
+       // result: (MOVDconst [0])
        for {
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               if v_0.AuxInt != 0 {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWloadidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL x (MOVDconst [1]))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
-       // cond: (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2))
-       // result: (MOVDconst [0])
+       // match: (MUL (MOVDconst [1]) x)
+       // cond:
+       // result: x
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWloadidx4_0(v *Value) bool {
-       // match: (MOVWloadidx4 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVWload [c<<2] ptr mem)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWload)
-               v.AuxInt = c << 2
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _))
-       // cond: isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)
-       // result: (MOVDconst [0])
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
        for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWstorezeroidx4 {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_2.Args[2]
-               ptr2 := v_2.Args[0]
-               idx2 := v_2.Args[1]
-               if !(isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWreg_0(v *Value) bool {
-       // match: (MOVWreg x:(MOVBload _ _))
-       // cond:
-       // result: (MOVDreg x)
+func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVBload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg x:(MOVBUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUload {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg x:(MOVHload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVHload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg x:(MOVHUload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUload {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg x:(MOVWload _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVWload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[1]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVBloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBloadidx {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVBUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVBUloadidx {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVHloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVHUloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVWloadidx _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWloadidx {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWreg_10(v *Value) bool {
-       // match: (MOVWreg x:(MOVHloadidx2 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVHloadidx2 {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWreg x:(MOVHUloadidx2 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
-       for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHUloadidx2 {
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVWloadidx4 _ _ _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVWloadidx4 {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[2]
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWreg x:(MOVBreg _))
+       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [c*d])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
                return true
        }
-       // match: (MOVWreg x:(MOVBUreg _))
+       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MOVDconst [c*d])
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVBUreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
-               v.AddArg(x)
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
                return true
        }
-       // match: (MOVWreg x:(MOVHreg _))
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
+       // match: (MULW (NEG x) y)
        // cond:
-       // result: (MOVDreg x)
+       // result: (MNEGW x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64NEG {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               x := v_0.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64MNEGW)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWreg x:(MOVHreg _))
+       // match: (MULW y (NEG x))
        // cond:
-       // result: (MOVDreg x)
+       // result: (MNEGW x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpARM64MOVHreg {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64NEG {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               x := v_1.Args[0]
+               v.reset(OpARM64MNEGW)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWreg x:(MOVWreg _))
-       // cond:
-       // result: (MOVDreg x)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
+       // result: (NEG x)
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpARM64MOVWreg {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDreg)
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
+                       break
+               }
+               v.reset(OpARM64NEG)
                v.AddArg(x)
                return true
        }
-       // match: (MOVWreg (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c))])
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (NEG x)
        for {
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c))
-               return true
-       }
-       // match: (MOVWreg (SLLconst [lc] x))
-       // cond: lc < 32
-       // result: (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < 32) {
+               x := v.Args[1]
+               if !(int32(c) == -1) {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc, 32-lc)
+               v.reset(OpARM64NEG)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstore_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVWstore [off] {sym} (ADD ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx ptr idx val mem)
+       // match: (MULW (MOVDconst [c]) _)
+       // cond: int32(c)==0
+       // result: (MOVDconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               c := v_0.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstoreidx4 ptr idx val mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if v_0.AuxInt != 2 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(off == 0 && sym == nil) {
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==1
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == 1) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezero [off] {sym} ptr mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w mem)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 32 {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               if x.AuxInt != i-4 {
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x.Aux != s {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               if w != x.Args[1] {
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
-               if v.AuxInt != 4 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               if v_1.AuxInt != 32 {
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
-               if v.AuxInt != 4 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_1.AuxInt != 32 {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx4 {
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               if w != x.Args[2] {
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx1)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstore_10(v *Value) bool {
+func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstore [i-4] {s} ptr0 w0 mem)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               ptr0 := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstore {
-                       break
-               }
-               if x.AuxInt != i-4 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               w0 := x.Args[1]
-               if w0.Op != OpARM64SRLconst {
-                       break
-               }
-               if w0.AuxInt != j-32 {
-                       break
-               }
-               if w != w0.Args[0] {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDstore)
-               v.AuxInt = i - 4
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstoreidx ptr1 idx1 w0 mem)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
-               if v.AuxInt != 4 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if w0.AuxInt != j-32 {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if w != w0.Args[0] {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
+               return true
+       }
+       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               d := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(int32(c) * int32(d))
                return true
        }
-       // match: (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
+       return false
+}
+func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
+       // match: (MVN (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [^c])
        for {
-               if v.AuxInt != 4 {
-                       break
-               }
-               s := v.Aux
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
+       // match: (NEG (MUL x y))
+       // cond:
+       // result: (MNEG x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MUL {
                        break
                }
                _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64MNEG)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG (MULW x y))
+       // cond:
+       // result: (MNEGW x y)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MULW {
                        break
                }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstoreidx4 {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64MNEGW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (NEG (MOVDconst [c]))
+       // cond:
+       // result: (MOVDconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x.Args[3]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               w0 := x.Args[2]
-               if w0.Op != OpARM64SRLconst {
+               c := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -c
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
+       // match: (NotEqual (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagEQ {
                        break
                }
-               if w0.AuxInt != j-32 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (NotEqual (FlagLT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_ULT {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagLT_UGT {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (FlagGT_ULT))
+       // cond:
+       // result: (MOVDconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_ULT {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstoreidx_0(v *Value) bool {
-       // match: (MOVWstoreidx ptr (MOVDconst [c]) val mem)
+       // match: (NotEqual (FlagGT_UGT))
        // cond:
-       // result: (MOVWstore [c] ptr val mem)
+       // result: (MOVDconst [1])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64FlagGT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (MOVWstoreidx (MOVDconst [c]) idx val mem)
+       // match: (NotEqual (InvertFlags x))
        // cond:
-       // result: (MOVWstore [c] idx val mem)
+       // result: (NotEqual x)
        for {
-               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64InvertFlags {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx ptr (SLLconst [2] idx) val mem)
+       return false
+}
+func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
+       // match: (OR x (MOVDconst [c]))
        // cond:
-       // result: (MOVWstoreidx4 ptr idx val mem)
+       // result: (ORconst [c] x)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx (SLLconst [2] idx) ptr val mem)
+       // match: (OR (MOVDconst [c]) x)
        // cond:
-       // result: (MOVWstoreidx4 ptr idx val mem)
+       // result: (ORconst [c] x)
        for {
-               _ = v.Args[3]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               if v_0.AuxInt != 2 {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx ptr idx (MOVDconst [0]) mem)
+       // match: (OR x x)
        // cond:
-       // result: (MOVWstorezeroidx ptr idx mem)
+       // result: x
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
-                       break
-               }
-               if v_2.AuxInt != 0 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVWstoreidx ptr idx (MOVWreg x) mem)
+       // match: (OR x (MVN y))
        // cond:
-       // result: (MOVWstoreidx ptr idx x mem)
+       // result: (ORN x y)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MVN {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               y := v_1.Args[0]
+               v.reset(OpARM64ORN)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx ptr idx (MOVWUreg x) mem)
+       // match: (OR (MVN y) x)
        // cond:
-       // result: (MOVWstoreidx ptr idx x mem)
+       // result: (ORN x y)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MVN {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORN)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDstoreidx ptr idx w mem)
+       // match: (OR x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if v_1.AuxInt != 4 {
-                       break
-               }
-               idx := v_1.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64SRLconst {
-                       break
-               }
-               if v_2.AuxInt != 32 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpARM64MOVWstoreidx {
-                       break
-               }
-               _ = x.Args[3]
-               if ptr != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64MOVDstoreidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstoreidx4_0(v *Value) bool {
-       // match: (MOVWstoreidx4 ptr (MOVDconst [c]) val mem)
-       // cond:
-       // result: (MOVWstore [c<<2] ptr val mem)
+       // match: (OR x1:(SLLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftLL x0 y [c])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               c := v_1.AuxInt
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = c << 2
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVWstorezeroidx4 ptr idx mem)
+       // match: (OR x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx4 ptr idx (MOVWreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx4 ptr idx x mem)
+       // match: (OR x1:(SRLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRL x0 y [c])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWreg {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWstoreidx4 ptr idx (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstoreidx4 ptr idx x mem)
+       // match: (OR x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVWUreg {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               x := v_2.Args[0]
-               mem := v.Args[3]
-               v.reset(OpARM64MOVWstoreidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
+func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (OR x1:(SRAconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (ORshiftRA x0 y [c])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // match: (OR (SLL x (ANDconst <t> [63] y)) (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x (NEG <t> y))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64SLL {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [off] {sym} (ADD ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 63 {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem)
-       // cond: off == 0 && sym == nil
-       // result: (MOVWstorezeroidx4 ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               if v_1.Type != typ.UInt64 {
                        break
                }
-               if v_0.AuxInt != 2 {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SRL {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(off == 0 && sym == nil) {
+               if v_1_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
-       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
-       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               ptr0 := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezero {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               j := x.AuxInt
-               if x.Aux != s {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               _ = x.Args[1]
-               ptr1 := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64MOVDstorezero)
-               v.AuxInt = min(i, j)
-               v.Aux = s
-               v.AddArg(ptr0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 idx1 mem)
-       for {
-               if v.AuxInt != 4 {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADD {
+               if v_1_0_1_0.AuxInt != 64 {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezeroidx {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x)) {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr1)
-               v.AddArg(idx1)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
-       // cond: x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)
-       // result: (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
-       for {
-               if v.AuxInt != 4 {
+               if v_1_0_1_1.AuxInt != 63 {
                        break
                }
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDshiftLL {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               if v_0.AuxInt != 2 {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr0 := v_0.Args[0]
-               idx0 := v_0.Args[1]
-               x := v.Args[1]
-               if x.Op != OpARM64MOVWstorezeroidx4 {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               _ = x.Args[2]
-               ptr1 := x.Args[0]
-               idx1 := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && s == nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && clobber(x)) {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr1)
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, idx1.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx1)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezeroidx_0(v *Value) bool {
-       // match: (MOVWstorezeroidx ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVWstorezero [c] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1_0.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = c
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx (MOVDconst [c]) idx mem)
-       // cond:
-       // result: (MOVWstorezero [c] idx mem)
-       for {
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               idx := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = c
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx ptr (SLLconst [2] idx) mem)
-       // cond:
-       // result: (MOVWstorezeroidx4 ptr idx mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1_1_0_0.AuxInt != 64 {
+                       break
+               }
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1_0_1.Type != t {
+                       break
+               }
+               if v_1_1_0_1.AuxInt != 63 {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if y != v_1_1_0_1.Args[0] {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstorezeroidx (SLLconst [2] idx) ptr mem)
-       // cond:
-       // result: (MOVWstorezeroidx4 ptr idx mem)
+       // match: (OR (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))) (SLL x (ANDconst <t> [63] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x (NEG <t> y))
        for {
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               if v_0.AuxInt != 2 {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezeroidx4)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVDstorezeroidx ptr idx mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64ADDconst {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SRL {
                        break
                }
-               if v_1.AuxInt != 4 {
+               if v_0_0.Type != typ.UInt64 {
                        break
                }
-               idx := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpARM64MOVWstorezeroidx {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               _ = x.Args[2]
-               if ptr != x.Args[0] {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               if idx != x.Args[1] {
+               if v_0_0_1_0.AuxInt != 64 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDstorezeroidx)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWstorezeroidx4_0(v *Value) bool {
-       // match: (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem)
-       // cond:
-       // result: (MOVWstorezero [c<<2] ptr mem)
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstorezero)
-               v.AuxInt = c << 2
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
-       // match: (MUL (NEG x) y)
-       // cond:
-       // result: (MNEG x y)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64NEG {
+               if v_0_0_1_1.AuxInt != 63 {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MUL y (NEG x))
-       // cond:
-       // result: (MNEG x y)
-       for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64NEG {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MUL x (MOVDconst [-1]))
-       // cond:
-       // result: (NEG x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               if v_1.AuxInt != -1 {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL (MOVDconst [-1]) x)
-       // cond:
-       // result: (NEG x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0.Type != t {
                        break
                }
-               if v_0.AuxInt != -1 {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL _ (MOVDconst [0]))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_1_0_0.AuxInt != 64 {
                        break
                }
-               if v_1.AuxInt != 0 {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MUL (MOVDconst [0]) _)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               if v_0.AuxInt != 0 {
+               if v_0_1_0_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MUL x (MOVDconst [1]))
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SLL {
                        break
                }
-               if v_1.AuxInt != 1 {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL (MOVDconst [1]) x)
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               if v_1_1.Type != t {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1.AuxInt != 63 {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if y != v_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (OR (SRL <typ.UInt64> x (ANDconst <t> [63] y)) (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 63 {
+                       break
+               }
+               y := v_0_1.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64CSEL0 {
+                       break
+               }
+               if v_1.Type != typ.UInt64 {
+                       break
+               }
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
+                       break
+               }
+               if v_1_0_1.Type != t {
+                       break
+               }
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_0_1_0.AuxInt != 64 {
+                       break
+               }
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_0_1_1.Type != t {
+                       break
+               }
+               if v_1_0_1_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_1_0_1_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && c >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && c >= 3) {
+               if v_1_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if v_1_1_0_0.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1_0_1.Type != t {
+                       break
+               }
+               if v_1_1_0_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && c >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (OR (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))) (SRL <typ.UInt64> x (ANDconst <t> [63] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && c >= 7) {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SLL {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               if v_0_0_1_0.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_0_1_1.AuxInt != 63 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0.Type != t {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MUL_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MUL x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_1_0_0.AuxInt != 64 {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               if v_0_1_0_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MUL (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c*d])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SRL {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
-               return true
-       }
-       // match: (MUL (MOVDconst [d]) (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [c*d])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1.Type != typ.UInt64 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1.Type != t {
                        break
                }
-               d := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1.AuxInt != 63 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_0(v *Value) bool {
-       // match: (MULW (NEG x) y)
-       // cond:
-       // result: (MNEGW x y)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64NEG {
+               if y != v_1_1.Args[0] {
                        break
                }
-               x := v_0.Args[0]
-               y := v.Args[1]
-               v.reset(OpARM64MNEGW)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (MULW y (NEG x))
-       // cond:
-       // result: (MNEGW x y)
-       for {
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64NEG {
+               if !(cc.(Op) == OpARM64LessThanU) {
                        break
                }
-               x := v_1.Args[0]
-               v.reset(OpARM64MNEGW)
+               v.reset(OpARM64ROR)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // match: (OR (SLL x (ANDconst <t> [31] y)) (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x (NEG <t> y))
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 31 {
+                       break
+               }
+               y := v_0_1.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == -1) {
+               if v_1.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (NEG x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SRL {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == -1) {
+               if v_1_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW _ (MOVDconst [c]))
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 0) {
+               if x != v_1_0_0.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==1
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               if v_1_0_1_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: x
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(int32(c) == 1) {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.AuxInt != 31 {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               if v_1_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
-       // result: (ADDshiftLL x x [log2(c-1)])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
+               if v_1_1_0_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c - 1)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               if v_1_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               if v_1_1_0_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
-       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       // match: (OR (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))) (SLL x (ANDconst <t> [31] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x (NEG <t> y))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64ADDshiftLL)
-               v.AuxInt = log2(c + 1)
-               v0 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SRL {
+                       break
+               }
+               if v_0_0.Type != typ.UInt32 {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_0_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
+                       break
+               }
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               if v_0_0_1_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
-       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 1
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               if v_0_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
-       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
+               if v_0_1_0_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 2
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
-       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0_1.AuxInt != 31 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 7)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v1 := b.NewValue0(v.Pos, OpARM64NEG, x.Type)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULW x (MOVDconst [c]))
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64SLL {
                        break
                }
-               c := v_1.AuxInt
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULW (MOVDconst [c]) x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
-       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+               if v_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type)
-               v0.AuxInt = 3
-               v0.AddArg(x)
-               v0.AddArg(x)
+               if v_1_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (MULW (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
+       // match: (OR (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y)) (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64SRL {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
-               return true
-       }
-       // match: (MULW (MOVDconst [d]) (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [int64(int32(c)*int32(d))])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               d := v_0.AuxInt
+               x := v_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 31 {
+                       break
+               }
+               y := v_0_1.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(int32(c) * int32(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64MVN_0(v *Value) bool {
-       // match: (MVN (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1.Type != typ.UInt32 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NEG_0(v *Value) bool {
-       // match: (NEG (MUL x y))
-       // cond:
-       // result: (MNEG x y)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MUL {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SLL {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64MNEG)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (NEG (MULW x y))
-       // cond:
-       // result: (MNEGW x y)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MULW {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpARM64MNEGW)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (NEG (MOVDconst [c]))
-       // cond:
-       // result: (MOVDconst [-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
+                       break
+               }
+               if v_1_0_1.Type != t {
+                       break
+               }
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_0_1_1.Type != t {
+                       break
+               }
+               if v_1_0_1_1.AuxInt != 31 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -c
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64NotEqual_0(v *Value) bool {
-       // match: (NotEqual (FlagEQ))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagEQ {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (NotEqual (FlagLT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_ULT {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagLT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagLT_UGT {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_ULT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_ULT {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (FlagGT_UGT))
-       // cond:
-       // result: (MOVDconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_1_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (NotEqual (InvertFlags x))
-       // cond:
-       // result: (NotEqual x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_0(v *Value) bool {
-       // match: (OR x (MOVDconst [c]))
-       // cond:
-       // result: (ORconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1_0_0.AuxInt != 32 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR (MOVDconst [c]) x)
-       // cond:
-       // result: (ORconst [c] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR x x)
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if v_1_1_0_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (OR x (MVN y))
-       // cond:
-       // result: (ORN x y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MVN {
+               if v_1_1_0_1.AuxInt != 31 {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpARM64ORN)
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (OR (MVN y) x)
-       // cond:
-       // result: (ORN x y)
+       // match: (OR (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MVN {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64ORN)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SLL {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x1:(SLLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SLLconst {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64ORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               if v_0_0_1_0.AuxInt != 32 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x1:(SRLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRLconst {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if v_0_0_1_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64ORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (OR x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR x1:(SRAconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (ORshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRAconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
+                       break
+               }
+               if v_0_1_0.Type != t {
+                       break
+               }
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_0_1_0_1.Type != t {
+                       break
+               }
+               if v_0_1_0_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_0_1_0_1.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRL {
+                       break
+               }
+               if v_1.Type != typ.UInt32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVWUreg {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if x != v_1_0.Args[0] {
                        break
                }
-               v.reset(OpARM64ORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               if v_1_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
                v.AddArg(y)
                return true
        }
@@ -17182,6 +19027,11 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (OR (ANDconst [ac] y) (UBFIZ [bfc] x))
        // cond: ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
        // result: (BFI [bfc] y x)
@@ -17848,11 +19698,6 @@ func rewriteValueARM64_OpARM64OR_10(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
        // result: @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
@@ -18173,19 +20018,246 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if y7.Op != OpARM64MOVDnop {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               if x7.Aux != s {
+                       break
+               }
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
+                       break
+               }
+               if mem != x7.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x7.AuxInt
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 56 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x0.AuxInt
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
+                       break
+               }
+               if mem != x0.Args[1] {
+                       break
+               }
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i6 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i5 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o3.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i4 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
+                       break
+               }
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x4.AuxInt
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x5.AuxInt
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
+                       break
+               }
+               if mem != x5.Args[1] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i0 := x7.AuxInt
-               if x7.Aux != s {
+               i1 := x6.AuxInt
+               if x6.Aux != s {
                        break
                }
-               _ = x7.Args[1]
-               if p != x7.Args[0] {
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
                        break
                }
-               if mem != x7.Args[1] {
+               if mem != x6.Args[1] {
                        break
                }
                if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
@@ -18203,26 +20275,13 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x7.AuxInt
-               s := x7.Aux
-               _ = x7.Args[1]
-               p := x7.Args[0]
-               mem := x7.Args[1]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -18285,17 +20344,13 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x0.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x0.AuxInt
-               if x0.Aux != s {
+               if x0.AuxInt != 7 {
                        break
                }
+               s := x0.Aux
                _ = x0.Args[1]
-               if p != x0.Args[0] {
-                       break
-               }
-               if mem != x0.Args[1] {
-                       break
-               }
+               p := x0.Args[0]
+               mem := x0.Args[1]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -18304,7 +20359,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x1.AuxInt
+               if x1.AuxInt != 6 {
+                       break
+               }
                if x1.Aux != s {
                        break
                }
@@ -18323,7 +20380,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x2.AuxInt
+               if x2.AuxInt != 5 {
+                       break
+               }
                if x2.Aux != s {
                        break
                }
@@ -18342,7 +20401,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x3.AuxInt
+               if x3.AuxInt != 4 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -18361,7 +20422,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x4.AuxInt
+               if x4.AuxInt != 3 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -18380,7 +20443,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x5.AuxInt
+               if x5.AuxInt != 2 {
+                       break
+               }
                if x5.Aux != s {
                        break
                }
@@ -18399,39 +20464,68 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x6.AuxInt
+               if x6.AuxInt != 1 {
+                       break
+               }
                if x6.Aux != s {
                        break
                }
                _ = x6.Args[1]
-               if p != x6.Args[0] {
+               p1 := x6.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x6.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr0 := x7.Args[0]
+               idx0 := x7.Args[1]
+               if mem != x7.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
        // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
        // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr0 := x7.Args[0]
+               idx0 := x7.Args[1]
+               mem := x7.Args[2]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -18500,7 +20594,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                s := x0.Aux
                _ = x0.Args[1]
                p := x0.Args[0]
-               mem := x0.Args[1]
+               if mem != x0.Args[1] {
+                       break
+               }
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -18631,20 +20727,6 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x7.Args[2]
-               ptr0 := x7.Args[0]
-               idx0 := x7.Args[1]
-               if mem != x7.Args[2] {
-                       break
-               }
                if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
@@ -18657,25 +20739,13 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [7] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [6] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [4] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [3] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [2] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x7.Args[2]
-               ptr0 := x7.Args[0]
-               idx0 := x7.Args[1]
-               mem := x7.Args[2]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -18735,37 +20805,43 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x0.AuxInt != 7 {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               if mem != x0.Args[1] {
+               if x0_1.AuxInt != 7 {
                        break
                }
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x1.AuxInt != 6 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               if x1_1.AuxInt != 6 {
                        break
                }
-               if mem != x1.Args[1] {
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o4.Args[1]
@@ -18773,20 +20849,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2.AuxInt != 5 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2_1.AuxInt != 5 {
                        break
                }
-               if mem != x2.Args[1] {
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := o3.Args[1]
@@ -18794,20 +20874,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3.AuxInt != 4 {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               if x3.Aux != s {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               if x3_1.AuxInt != 4 {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
                        break
                }
                y4 := o2.Args[1]
@@ -18815,87 +20899,125 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x4.AuxInt != 3 {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x4_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
+                       break
+               }
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x5_1.AuxInt != 2 {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if idx != x5_1.Args[0] {
                        break
                }
-               if mem != x4.Args[1] {
+               if mem != x5.Args[2] {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x5.AuxInt != 2 {
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
                        break
                }
-               if x5.Aux != s {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               if x6_1.AuxInt != 1 {
                        break
                }
-               if mem != x5.Args[1] {
+               if idx != x6_1.Args[0] {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               if mem != x6.Args[2] {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               if x6.AuxInt != 1 {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x6.Aux != s {
+               _ = x7.Args[2]
+               if ptr != x7.Args[0] {
                        break
                }
-               _ = x6.Args[1]
-               p1 := x6.Args[0]
-               if p1.Op != OpARM64ADD {
+               if idx != x7.Args[1] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x6.Args[1] {
+               if mem != x7.Args[2] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
                v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))))
        // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
        // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr := x7.Args[0]
+               idx := x7.Args[1]
+               mem := x7.Args[2]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -18959,7 +21081,9 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = x0.Args[2]
-               ptr := x0.Args[0]
+               if ptr != x0.Args[0] {
+                       break
+               }
                x0_1 := x0.Args[1]
                if x0_1.Op != OpARM64ADDconst {
                        break
@@ -18967,8 +21091,12 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x0_1.AuxInt != 7 {
                        break
                }
-               idx := x0_1.Args[0]
-               mem := x0.Args[2]
+               if idx != x0_1.Args[0] {
+                       break
+               }
+               if mem != x0.Args[2] {
+                       break
+               }
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -19060,114 +21188,316 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               if x4_1.AuxInt != 3 {
+               if x4_1.AuxInt != 3 {
+                       break
+               }
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
+                       break
+               }
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x5_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x5_1.Args[0] {
+                       break
+               }
+               if mem != x5.Args[2] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
+                       break
+               }
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x6_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x6_1.Args[0] {
+                       break
+               }
+               if mem != x6.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o0.AuxInt != 8 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 16 {
+                       break
+               }
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
+                       break
+               }
+               if s0.AuxInt != 24 {
+                       break
+               }
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
+                       break
+               }
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
+                       break
+               }
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
+                       break
+               }
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               i1 := x1.AuxInt
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               i2 := x2.AuxInt
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               i3 := x3.AuxInt
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
+                       break
+               }
+               if mem != x3.Args[1] {
+                       break
+               }
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if idx != x4_1.Args[0] {
+               i3 := x3.AuxInt
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if mem != x4.Args[2] {
+               if o0.AuxInt != 8 {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUloadidx {
+               if o1.AuxInt != 16 {
                        break
                }
-               _ = x5.Args[2]
-               if ptr != x5.Args[0] {
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64ADDconst {
+               if s0.AuxInt != 24 {
                        break
                }
-               if x5_1.AuxInt != 2 {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if idx != x5_1.Args[0] {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x5.Args[2] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUloadidx {
+               if mem != x0.Args[1] {
                        break
                }
-               _ = x6.Args[2]
-               if ptr != x6.Args[0] {
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpARM64ADDconst {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x6_1.AuxInt != 1 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x6_1.Args[0] {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x6.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x7.Args[2]
-               if ptr != x7.Args[0] {
+               i2 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               if idx != x7.Args[1] {
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x7.Args[2]
-               ptr := x7.Args[0]
-               idx := x7.Args[1]
-               mem := x7.Args[2]
-               o0 := v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -19183,43 +21513,11 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o2.AuxInt != 24 {
-                       break
-               }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o3.AuxInt != 32 {
-                       break
-               }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o4.AuxInt != 40 {
-                       break
-               }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o5.AuxInt != 48 {
-                       break
-               }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
+               s0 := o1.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 56 {
+               if s0.AuxInt != 24 {
                        break
                }
                y0 := s0.Args[0]
@@ -19231,187 +21529,207 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = x0.Args[2]
-               if ptr != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x0_1.AuxInt != 7 {
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if idx != x0_1.Args[0] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x0.Args[2] {
+               if x1.AuxInt != 1 {
                        break
                }
-               y1 := o5.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x1_1.AuxInt != 6 {
+               if x2.AuxInt != 2 {
                        break
                }
-               if idx != x1_1.Args[0] {
+               if x2.Aux != s {
                        break
                }
-               if mem != x1.Args[2] {
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
                        break
                }
-               y2 := o4.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               y3 := v.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               if x3.AuxInt != 3 {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               if x3.Aux != s {
                        break
                }
-               if x2_1.AuxInt != 5 {
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
                        break
                }
-               if idx != x2_1.Args[0] {
+               if mem != x3.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               y3 := o3.Args[1]
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               y3 := v.Args[0]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
-                       break
-               }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x3_1.AuxInt != 4 {
-                       break
-               }
-               if idx != x3_1.Args[0] {
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x3.Args[2] {
+               if x3.AuxInt != 3 {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               s := x3.Aux
+               _ = x3.Args[1]
+               p := x3.Args[0]
+               mem := x3.Args[1]
+               o0 := v.Args[1]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               if o0.AuxInt != 8 {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64ADDconst {
+               if o1.AuxInt != 16 {
                        break
                }
-               if x4_1.AuxInt != 3 {
+               _ = o1.Args[1]
+               s0 := o1.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               if idx != x4_1.Args[0] {
+               if s0.AuxInt != 24 {
                        break
                }
-               if mem != x4.Args[2] {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUloadidx {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               if mem != x0.Args[2] {
                        break
                }
-               _ = x5.Args[2]
-               if ptr != x5.Args[0] {
+               y1 := o1.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64ADDconst {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x5_1.AuxInt != 2 {
+               if x1.AuxInt != 1 {
                        break
                }
-               if idx != x5_1.Args[0] {
+               if x1.Aux != s {
                        break
                }
-               if mem != x5.Args[2] {
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUloadidx {
+               y2 := o0.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               _ = x6.Args[2]
-               if ptr != x6.Args[0] {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpARM64ADDconst {
+               if x2.AuxInt != 2 {
                        break
                }
-               if x6_1.AuxInt != 1 {
+               if x2.Aux != s {
                        break
                }
-               if idx != x6_1.Args[0] {
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
                        break
                }
-               if mem != x6.Args[2] {
+               if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               b = mergePoint(b, x0, x1, x2, x3)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -19443,31 +21761,36 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
                y1 := o1.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               = x1.Args[1]
-               if p != x1.Args[0] {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x1.Args[1] {
+               if x1_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o0.Args[1]
@@ -19475,18 +21798,24 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               = x2.Args[1]
-               if p != x2.Args[0] {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x2.Args[1] {
+               if x2_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := v.Args[1]
@@ -19494,40 +21823,48 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i3 := x3.AuxInt
-               if x3.Aux != s {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               = x3.Args[1]
-               if p != x3.Args[0] {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x3.Args[1] {
+               if x3_1.AuxInt != 3 {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3)
                v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       return false
+}
+func rewriteValueARM64_OpARM64OR_40(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -19536,14 +21873,20 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i3 := x3.AuxInt
-               s := x3.Aux
-               _ = x3.Args[1]
-               p := x3.Args[0]
-               mem := x3.Args[1]
+               _ = x3.Args[2]
+               ptr := x3.Args[0]
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x3_1.AuxInt != 3 {
+                       break
+               }
+               idx := x3_1.Args[0]
+               mem := x3.Args[2]
                o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
@@ -19572,18 +21915,17 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
                        break
                }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               if idx != x0.Args[1] {
                        break
                }
-               if mem != x0.Args[1] {
+               if mem != x0.Args[2] {
                        break
                }
                y1 := o1.Args[1]
@@ -19591,59 +21933,68 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               = x1.Args[1]
-               if p != x1.Args[0] {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x1.Args[1] {
+               if x1_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o0.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               if x2_1.AuxInt != 2 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if idx != x2_1.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x2.Args[2] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3)
                v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -19663,11 +22014,43 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 24 {
+                       break
+               }
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o3.AuxInt != 32 {
+                       break
+               }
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o4.AuxInt != 40 {
+                       break
+               }
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o5.AuxInt != 48 {
+                       break
+               }
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
                if s0.Op != OpARM64SLLconst {
                        break
                }
-               if s0.AuxInt != 24 {
+               if s0.AuxInt != 56 {
                        break
                }
                y0 := s0.Args[0]
@@ -19675,14 +22058,15 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
-               y1 := o1.Args[1]
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -19690,22 +22074,18 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 1 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s := x1.Aux
                _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if p != x1.Args[0] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
-               y2 := o0.Args[1]
+               y2 := o4.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -19713,18 +22093,18 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 2 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
                _ = x2.Args[1]
-               p := x2.Args[0]
+               if p != x2.Args[0] {
+                       break
+               }
                if mem != x2.Args[1] {
                        break
                }
-               y3 := v.Args[1]
+               y3 := o3.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
@@ -19732,9 +22112,7 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != 3 {
-                       break
-               }
+               i3 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -19745,150 +22123,119 @@ func rewriteValueARM64_OpARM64OR_20(v *Value) bool {
                if mem != x3.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
-                       break
-               }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               y3 := v.Args[0]
-               if y3.Op != OpARM64MOVDnop {
-                       break
-               }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x3.AuxInt != 3 {
-                       break
-               }
-               s := x3.Aux
-               _ = x3.Args[1]
-               p := x3.Args[0]
-               mem := x3.Args[1]
-               o0 := v.Args[1]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if o1.AuxInt != 16 {
+               i4 := x4.AuxInt
+               if x4.Aux != s {
                        break
                }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               if s0.AuxInt != 24 {
+               if mem != x4.Args[1] {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               if mem != x0.Args[2] {
+               i5 := x5.AuxInt
+               if x5.Aux != s {
                        break
                }
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if mem != x5.Args[1] {
                        break
                }
-               if x1.AuxInt != 1 {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               if x1.Aux != s {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               i6 := x6.AuxInt
+               if x6.Aux != s {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               if mem != x6.Args[1] {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               y7 := v.Args[1]
+               if y7.Op != OpARM64MOVDnop {
                        break
                }
-               if x2.AuxInt != 2 {
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.Aux != s {
+               i7 := x7.AuxInt
+               if x7.Aux != s {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               _ = x7.Args[1]
+               if p != x7.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if mem != x7.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                _ = v.Args[1]
-               o0 := v.Args[0]
+               y7 := v.Args[0]
+               if y7.Op != OpARM64MOVDnop {
+                       break
+               }
+               x7 := y7.Args[0]
+               if x7.Op != OpARM64MOVBUload {
+                       break
+               }
+               i7 := x7.AuxInt
+               s := x7.Aux
+               _ = x7.Args[1]
+               p := x7.Args[0]
+               mem := x7.Args[1]
+               o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
@@ -19904,247 +22251,198 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
-                       break
-               }
-               if s0.AuxInt != 24 {
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if o2.AuxInt != 24 {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               _ = o2.Args[1]
+               o3 := o2.Args[0]
+               if o3.Op != OpARM64ORshiftLL {
                        break
                }
-               _ = x0.Args[2]
-               ptr := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               if o3.AuxInt != 32 {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               _ = o3.Args[1]
+               o4 := o3.Args[0]
+               if o4.Op != OpARM64ORshiftLL {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               if o4.AuxInt != 40 {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               _ = o4.Args[1]
+               o5 := o4.Args[0]
+               if o5.Op != OpARM64ORshiftLL {
                        break
                }
-               if x1_1.AuxInt != 1 {
+               if o5.AuxInt != 48 {
                        break
                }
-               if idx != x1_1.Args[0] {
+               _ = o5.Args[1]
+               s0 := o5.Args[0]
+               if s0.Op != OpARM64SLLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if s0.AuxInt != 56 {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               y0 := s0.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               i0 := x0.AuxInt
+               if x0.Aux != s {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               _ = x0.Args[1]
+               if p != x0.Args[0] {
                        break
                }
-               if x2_1.AuxInt != 2 {
+               if mem != x0.Args[1] {
                        break
                }
-               if idx != x2_1.Args[0] {
+               y1 := o5.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x2.Args[2] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               y3 := v.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               if mem != x1.Args[1] {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst {
+               y2 := o4.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               if x3_1.AuxInt != 3 {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if idx != x3_1.Args[0] {
+               i2 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               if mem != x3.Args[2] {
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if mem != x2.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       // match: (OR <t> y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               y3 := v.Args[0]
+               y3 := o3.Args[1]
                if y3.Op != OpARM64MOVDnop {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x3.Args[2]
-               ptr := x3.Args[0]
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x3_1.AuxInt != 3 {
-                       break
-               }
-               idx := x3_1.Args[0]
-               mem := x3.Args[2]
-               o0 := v.Args[1]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
-                       break
-               }
-               _ = o1.Args[1]
-               s0 := o1.Args[0]
-               if s0.Op != OpARM64SLLconst {
-                       break
-               }
-               if s0.AuxInt != 24 {
-                       break
-               }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               i3 := x3.AuxInt
+               if x3.Aux != s {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
                        break
                }
-               if idx != x0.Args[1] {
+               if mem != x3.Args[1] {
                        break
                }
-               if mem != x0.Args[2] {
+               y4 := o2.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               y1 := o1.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               i4 := x4.AuxInt
+               if x4.Aux != s {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               if mem != x4.Args[1] {
                        break
                }
-               if x1_1.AuxInt != 1 {
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
                        break
                }
-               if idx != x1_1.Args[0] {
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x1.Args[2] {
+               i5 := x5.AuxInt
+               if x5.Aux != s {
                        break
                }
-               y2 := o0.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               _ = x5.Args[1]
+               if p != x5.Args[0] {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               if mem != x5.Args[1] {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUload {
                        break
                }
-               if x2_1.AuxInt != 2 {
+               i6 := x6.AuxInt
+               if x6.Aux != s {
                        break
                }
-               if idx != x2_1.Args[0] {
+               _ = x6.Args[1]
+               if p != x6.Args[0] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x6.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(o0) && clobber(o1) && clobber(s0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -20208,14 +22506,13 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -20224,14 +22521,18 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x1.AuxInt != 1 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -20243,14 +22544,14 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               if x2.AuxInt != 2 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2.Aux != s {
                        break
                }
+               _ = x2.Args[1]
+               p := x2.Args[0]
                if mem != x2.Args[1] {
                        break
                }
@@ -20262,7 +22563,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -20281,7 +22584,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x4.AuxInt
+               if x4.AuxInt != 4 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -20300,7 +22605,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x5.AuxInt
+               if x5.AuxInt != 5 {
+                       break
+               }
                if x5.Aux != s {
                        break
                }
@@ -20319,7 +22626,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x6.AuxInt
+               if x6.AuxInt != 6 {
+                       break
+               }
                if x6.Aux != s {
                        break
                }
@@ -20338,7 +22647,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x7.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x7.AuxInt
+               if x7.AuxInt != 7 {
+                       break
+               }
                if x7.Aux != s {
                        break
                }
@@ -20349,26 +22660,23 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if mem != x7.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem))))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -20380,7 +22688,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x7.Op != OpARM64MOVBUload {
                        break
                }
-               i7 := x7.AuxInt
+               if x7.AuxInt != 7 {
+                       break
+               }
                s := x7.Aux
                _ = x7.Args[1]
                p := x7.Args[0]
@@ -20445,18 +22755,13 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i0 := x0.AuxInt
-               if x0.Aux != s {
-                       break
-               }
-               _ = x0.Args[1]
-               if p != x0.Args[0] {
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if mem != x0.Args[1] {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               if mem != x0.Args[2] {
                        break
                }
                y1 := o5.Args[1]
@@ -20467,14 +22772,20 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
+               if x1.AuxInt != 1 {
+                       break
+               }
                if x1.Aux != s {
                        break
                }
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -20486,7 +22797,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               i2 := x2.AuxInt
+               if x2.AuxInt != 2 {
+                       break
+               }
                if x2.Aux != s {
                        break
                }
@@ -20505,7 +22818,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               i3 := x3.AuxInt
+               if x3.AuxInt != 3 {
+                       break
+               }
                if x3.Aux != s {
                        break
                }
@@ -20524,7 +22839,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               i4 := x4.AuxInt
+               if x4.AuxInt != 4 {
+                       break
+               }
                if x4.Aux != s {
                        break
                }
@@ -20543,7 +22860,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x5.Op != OpARM64MOVBUload {
                        break
                }
-               i5 := x5.AuxInt
+               if x5.AuxInt != 5 {
+                       break
+               }
                if x5.Aux != s {
                        break
                }
@@ -20562,7 +22881,9 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x6.Op != OpARM64MOVBUload {
                        break
                }
-               i6 := x6.AuxInt
+               if x6.AuxInt != 6 {
+                       break
+               }
                if x6.Aux != s {
                        break
                }
@@ -20573,26 +22894,23 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if mem != x6.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
                v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))) y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -20660,30 +22978,32 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
                mem := x0.Args[2]
                y1 := o5.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x1.AuxInt != 1 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               s := x1.Aux
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               if x1_1.AuxInt != 1 {
+                       break
+               }
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o4.Args[1]
@@ -20691,18 +23011,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2.AuxInt != 2 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               p := x2.Args[0]
-               if mem != x2.Args[1] {
+               if x2_1.AuxInt != 2 {
+                       break
+               }
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := o3.Args[1]
@@ -20710,20 +23036,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3.AuxInt != 3 {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               if x3.Aux != s {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               if x3_1.AuxInt != 3 {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
                        break
                }
                y4 := o2.Args[1]
@@ -20731,20 +23061,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x4.AuxInt != 4 {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if x4_1.AuxInt != 4 {
                        break
                }
-               if mem != x4.Args[1] {
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
                        break
                }
                y5 := o1.Args[1]
@@ -20752,20 +23086,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               if x5.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x5.AuxInt != 5 {
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
                        break
                }
-               if x5.Aux != s {
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               if x5_1.AuxInt != 5 {
                        break
                }
-               if mem != x5.Args[1] {
+               if idx != x5_1.Args[0] {
+                       break
+               }
+               if mem != x5.Args[2] {
                        break
                }
                y6 := o0.Args[1]
@@ -20773,20 +23111,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               if x6.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x6.AuxInt != 6 {
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
                        break
                }
-               if x6.Aux != s {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x6.Args[1]
-               if p != x6.Args[0] {
+               if x6_1.AuxInt != 6 {
                        break
                }
-               if mem != x6.Args[1] {
+               if idx != x6_1.Args[0] {
+                       break
+               }
+               if mem != x6.Args[2] {
                        break
                }
                y7 := v.Args[1]
@@ -20794,23 +23136,27 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               if x7.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x7.AuxInt != 7 {
+               _ = x7.Args[2]
+               if ptr != x7.Args[0] {
                        break
                }
-               if x7.Aux != s {
+               x7_1 := x7.Args[1]
+               if x7_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x7.Args[1]
-               if p != x7.Args[0] {
+               if x7_1.AuxInt != 7 {
                        break
                }
-               if mem != x7.Args[1] {
+               if idx != x7_1.Args[0] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if mem != x7.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
@@ -20818,15 +23164,15 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                v.reset(OpCopy)
                v.AddArg(v0)
                v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
                v1.AddArg(mem)
                v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [3] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [4] {s} p mem))) y5:(MOVDnop x5:(MOVBUload [5] {s} p mem))) y6:(MOVDnop x6:(MOVBUload [6] {s} p mem))))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
        for {
                t := v.Type
                _ = v.Args[1]
@@ -20835,16 +23181,20 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUload {
+               if x7.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x7.Args[2]
+               ptr := x7.Args[0]
+               x7_1 := x7.Args[1]
+               if x7_1.Op != OpARM64ADDconst {
                        break
                }
-               if x7.AuxInt != 7 {
+               if x7_1.AuxInt != 7 {
                        break
                }
-               s := x7.Aux
-               _ = x7.Args[1]
-               p := x7.Args[0]
-               mem := x7.Args[1]
+               idx := x7_1.Args[0]
+               mem := x7.Args[2]
                o0 := v.Args[1]
                if o0.Op != OpARM64ORshiftLL {
                        break
@@ -20909,8 +23259,12 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
                if mem != x0.Args[2] {
                        break
                }
@@ -20919,24 +23273,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x1.AuxInt != 1 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               if x1.Aux != s {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               if idx != x1_1.Args[0] {
+                       break
+               }
+               if mem != x1.Args[2] {
                        break
                }
                y2 := o4.Args[1]
@@ -20944,20 +23298,24 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2.AuxInt != 2 {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if x2.Aux != s {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x2_1.AuxInt != 2 {
                        break
                }
-               if mem != x2.Args[1] {
+               if idx != x2_1.Args[0] {
+                       break
+               }
+               if mem != x2.Args[2] {
                        break
                }
                y3 := o3.Args[1]
@@ -20965,460 +23323,955 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3.AuxInt != 3 {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               if x3.Aux != s {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               if x3_1.AuxInt != 3 {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x3_1.Args[0] {
+                       break
+               }
+               if mem != x3.Args[2] {
                        break
                }
                y4 := o2.Args[1]
                if y4.Op != OpARM64MOVDnop {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
+                       break
+               }
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x4_1.AuxInt != 4 {
+                       break
+               }
+               if idx != x4_1.Args[0] {
+                       break
+               }
+               if mem != x4.Args[2] {
+                       break
+               }
+               y5 := o1.Args[1]
+               if y5.Op != OpARM64MOVDnop {
+                       break
+               }
+               x5 := y5.Args[0]
+               if x5.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
+                       break
+               }
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x5_1.AuxInt != 5 {
+                       break
+               }
+               if idx != x5_1.Args[0] {
+                       break
+               }
+               if mem != x5.Args[2] {
+                       break
+               }
+               y6 := o0.Args[1]
+               if y6.Op != OpARM64MOVDnop {
+                       break
+               }
+               x6 := y6.Args[0]
+               if x6.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
+                       break
+               }
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x6_1.AuxInt != 6 {
+                       break
+               }
+               if idx != x6_1.Args[0] {
+                       break
+               }
+               if mem != x6.Args[2] {
+                       break
+               }
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORN_0(v *Value) bool {
+       // match: (ORN x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst [^c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORN x x)
+       // cond:
+       // result: (MOVDconst [-1])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORN x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORNshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORN x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORNshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORN x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (ORNshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64ORNshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftLL_0(v *Value) bool {
+       // match: (ORNshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [^int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               if x4.AuxInt != 4 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x4.Aux != s {
+               if !(c == d) {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRA_0(v *Value) bool {
+       // match: (ORNshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [^(c>>uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if mem != x4.Args[1] {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^(c >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUload {
+               if !(c == d) {
                        break
                }
-               if x5.AuxInt != 5 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORNshiftRL_0(v *Value) bool {
+       // match: (ORNshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [^int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x5.Aux != s {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = ^int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORNshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [-1])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               _ = x5.Args[1]
-               if p != x5.Args[0] {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if mem != x5.Args[1] {
+               if !(c == d) {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
+       // match: (ORconst [0] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUload {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORconst [-1] _)
+       // cond:
+       // result: (MOVDconst [-1])
+       for {
+               if v.AuxInt != -1 {
                        break
                }
-               if x6.AuxInt != 6 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x6.Aux != s {
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c | d
+               return true
+       }
+       // match: (ORconst [c] (ORconst [d] x))
+       // cond:
+       // result: (ORconst [c|d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ORconst {
                        break
                }
-               _ = x6.Args[1]
-               if p != x6.Args[0] {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c | d
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORconst [c1] (ANDconst [c2] x))
+       // cond: c2|c1 == ^0
+       // result: (ORconst [c1] x)
+       for {
+               c1 := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               if mem != x6.Args[1] {
+               c2 := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c2|c1 == ^0) {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c1
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftLL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ORconst [c] (SLLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.reset(OpCopy)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
                return true
        }
-       // match: (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
+       // match: (ORshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [int64(uint64(c)<<uint64(d))])
        for {
-               t := v.Type
+               d := v.AuxInt
                _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 8 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
-                       break
-               }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if o2.AuxInt != 24 {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL x y:(SLLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SLLconst {
                        break
                }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
+               c := y.AuxInt
+               if x != y.Args[0] {
                        break
                }
-               if o3.AuxInt != 32 {
+               if !(c == d) {
                        break
                }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORshiftLL [c] (SRLconst x [64-c]) x)
+       // cond:
+       // result: (RORconst [64-c] x)
+       for {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               if o4.AuxInt != 40 {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               if o5.AuxInt != 48 {
+               v.reset(OpARM64RORconst)
+               v.AuxInt = 64 - c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x)
+       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
+       // result: (RORWconst [32-c] x)
+       for {
+               t := v.Type
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               if s0.AuxInt != 56 {
+               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               v.reset(OpARM64RORWconst)
+               v.AuxInt = 32 - c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [c] (SRLconst x [64-c]) x2)
+       // cond:
+       // result: (EXTRconst [64-c] x2 x)
+       for {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               _ = x0.Args[2]
-               ptr := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y1 := o5.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               x := v_0.Args[0]
+               x2 := v.Args[1]
+               v.reset(OpARM64EXTRconst)
+               v.AuxInt = 64 - c
+               v.AddArg(x2)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x2)
+       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
+       // result: (EXTRWconst [32-c] x2 x)
+       for {
+               t := v.Type
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               x2 := v.Args[1]
+               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               v.reset(OpARM64EXTRWconst)
+               v.AuxInt = 32 - c
+               v.AddArg(x2)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y))
+       // cond: sc == getARM64BFwidth(bfc)
+       // result: (BFXIL [bfc] y x)
+       for {
+               sc := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               if x1_1.AuxInt != 1 {
+               if v_1.AuxInt != sc {
                        break
                }
-               if idx != x1_1.Args[0] {
+               y := v_1.Args[0]
+               if !(sc == getARM64BFwidth(bfc)) {
                        break
                }
-               if mem != x1.Args[2] {
+               v.reset(OpARM64BFXIL)
+               v.AuxInt = bfc
+               v.AddArg(y)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               y2 := o4.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x2_1.AuxInt != 2 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if idx != x2_1.Args[0] {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x1.Args[1] {
                        break
                }
-               y3 := o3.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x3_1.AuxInt != 3 {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if idx != x3_1.Args[0] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x3.Args[2] {
+               if x1.AuxInt != 1 {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64ADDconst {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               if x4_1.AuxInt != 4 {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if idx != x4_1.Args[0] {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if mem != x4.Args[2] {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUloadidx {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               _ = x5.Args[2]
-               if ptr != x5.Args[0] {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64ADDconst {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               if x5_1.AuxInt != 5 {
+               if idx != x1_1.Args[0] {
                        break
                }
-               if idx != x5_1.Args[0] {
+               if mem != x1.Args[2] {
                        break
                }
-               if mem != x5.Args[2] {
+               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
+       // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUloadidx {
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               _ = x6.Args[2]
-               if ptr != x6.Args[0] {
+               if o0.AuxInt != 16 {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpARM64ADDconst {
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUload {
                        break
                }
-               if x6_1.AuxInt != 6 {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if idx != x6_1.Args[0] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if mem != x6.Args[2] {
+               i2 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               y7 := v.Args[1]
-               if y7.Op != OpARM64MOVDnop {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
+               if mem != x1.Args[1] {
                        break
                }
-               _ = x7.Args[2]
-               if ptr != x7.Args[0] {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               x7_1 := x7.Args[1]
-               if x7_1.Op != OpARM64ADDconst {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x7_1.AuxInt != 7 {
+               i3 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               if idx != x7_1.Args[0] {
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
                        break
                }
-               if mem != x7.Args[2] {
+               if mem != x2.Args[1] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
                v0.AddArg(v1)
+               v0.AddArg(mem)
                return true
        }
-       // match: (OR <t> y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)) o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
        for {
                t := v.Type
-               _ = v.Args[1]
-               y7 := v.Args[0]
-               if y7.Op != OpARM64MOVDnop {
-                       break
-               }
-               x7 := y7.Args[0]
-               if x7.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x7.Args[2]
-               ptr := x7.Args[0]
-               x7_1 := x7.Args[1]
-               if x7_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x7_1.AuxInt != 7 {
+               if v.AuxInt != 24 {
                        break
                }
-               idx := x7_1.Args[0]
-               mem := x7.Args[2]
-               o0 := v.Args[1]
+               _ = v.Args[1]
+               o0 := v.Args[0]
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if o0.AuxInt != 8 {
+               if o0.AuxInt != 16 {
                        break
                }
                _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 16 {
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               if o2.AuxInt != 24 {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               _ = o2.Args[1]
-               o3 := o2.Args[0]
-               if o3.Op != OpARM64ORshiftLL {
+               if x1.AuxInt != 2 {
                        break
                }
-               if o3.AuxInt != 32 {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
-               _ = o3.Args[1]
-               o4 := o3.Args[0]
-               if o4.Op != OpARM64ORshiftLL {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               if o4.AuxInt != 40 {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               _ = o4.Args[1]
-               o5 := o4.Args[0]
-               if o5.Op != OpARM64ORshiftLL {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if o5.AuxInt != 48 {
+               if x2.AuxInt != 3 {
                        break
                }
-               _ = o5.Args[1]
-               s0 := o5.Args[0]
-               if s0.Op != OpARM64SLLconst {
+               if x2.Aux != s {
                        break
                }
-               if s0.AuxInt != 56 {
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
                        break
                }
-               y0 := s0.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
                        break
                }
-               _ = x0.Args[2]
-               if ptr != x0.Args[0] {
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if idx != x0.Args[1] {
+               if o0.AuxInt != 16 {
                        break
                }
-               if mem != x0.Args[2] {
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
                        break
                }
-               y1 := o5.Args[1]
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               idx := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -21434,7 +24287,7 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if x1_1.AuxInt != 1 {
+               if x1_1.AuxInt != 2 {
                        break
                }
                if idx != x1_1.Args[0] {
@@ -21443,7 +24296,7 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               y2 := o4.Args[1]
+               y2 := v.Args[1]
                if y2.Op != OpARM64MOVDnop {
                        break
                }
@@ -21456,636 +24309,427 @@ func rewriteValueARM64_OpARM64OR_30(v *Value) bool {
                        break
                }
                x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x2_1.AuxInt != 2 {
-                       break
-               }
-               if idx != x2_1.Args[0] {
-                       break
-               }
-               if mem != x2.Args[2] {
-                       break
-               }
-               y3 := o3.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               if x2_1.AuxInt != 3 {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               if idx != x2_1.Args[0] {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst {
+               if mem != x2.Args[2] {
                        break
                }
-               if x3_1.AuxInt != 3 {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
-               if idx != x3_1.Args[0] {
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx2 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
                        break
                }
-               if mem != x3.Args[2] {
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               y4 := o2.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               if o0.AuxInt != 16 {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               _ = o0.Args[1]
+               x0 := o0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx2 {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64ADDconst {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x4_1.AuxInt != 4 {
+               if x1.AuxInt != 2 {
                        break
                }
-               if idx != x4_1.Args[0] {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADDshiftLL {
                        break
                }
-               if mem != x4.Args[2] {
+               if p1.AuxInt != 1 {
                        break
                }
-               y5 := o1.Args[1]
-               if y5.Op != OpARM64MOVDnop {
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               x5 := y5.Args[0]
-               if x5.Op != OpARM64MOVBUloadidx {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               _ = x5.Args[2]
-               if ptr != x5.Args[0] {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpARM64ADDconst {
+               if x2.AuxInt != 3 {
                        break
                }
-               if x5_1.AuxInt != 5 {
+               if x2.Aux != s {
                        break
                }
-               if idx != x5_1.Args[0] {
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
                        break
                }
-               if mem != x5.Args[2] {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
-               y6 := o0.Args[1]
-               if y6.Op != OpARM64MOVDnop {
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, idx0.Type)
+               v1.AuxInt = 1
+               v1.AddArg(idx0)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
+       // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       for {
+               t := v.Type
+               if v.AuxInt != 56 {
                        break
                }
-               x6 := y6.Args[0]
-               if x6.Op != OpARM64MOVBUloadidx {
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               _ = x6.Args[2]
-               if ptr != x6.Args[0] {
+               if o0.AuxInt != 48 {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpARM64ADDconst {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               if x6_1.AuxInt != 6 {
+               if o1.AuxInt != 40 {
                        break
                }
-               if idx != x6_1.Args[0] {
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
                        break
                }
-               if mem != x6.Args[2] {
+               if o2.AuxInt != 32 {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o4) && clobber(o5) && clobber(s0)) {
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUload {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4, x5, x6, x7)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORN_0(v *Value) bool {
-       // match: (ORN x (MOVDconst [c]))
-       // cond:
-       // result: (ORconst [^c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               i0 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := o2.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORN x x)
-       // cond:
-       // result: (MOVDconst [-1])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORN x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               i4 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               v.reset(OpARM64ORNshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (ORN x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               if mem != x1.Args[1] {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64ORNshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (ORN x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (ORNshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               i5 := x2.AuxInt
+               if x2.Aux != s {
                        break
                }
-               v.reset(OpARM64ORNshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftLL_0(v *Value) bool {
-       // match: (ORNshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [^int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = x2.Args[1]
+               if p != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftLL x (SLLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if !(c == d) {
+               i6 := x3.AuxInt
+               if x3.Aux != s {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRA_0(v *Value) bool {
-       // match: (ORNshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [^(c>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^(c >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORNshiftRA x (SRAconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
+               if mem != x3.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               if !(c == d) {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORNshiftRL_0(v *Value) bool {
-       // match: (ORNshiftRL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [^int64(uint64(c)>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               i7 := x4.AuxInt
+               if x4.Aux != s {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = ^int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
+                       break
+               }
+               if mem != x4.Args[1] {
+                       break
+               }
+               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.Aux = s
+               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v1.AuxInt = i0
+               v1.AddArg(p)
+               v0.AddArg(v1)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORNshiftRL x (SRLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [-1])
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
        for {
-               d := v.AuxInt
+               t := v.Type
+               if v.AuxInt != 56 {
+                       break
+               }
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if o0.AuxInt != 48 {
                        break
                }
-               if !(c == d) {
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool {
-       // match: (ORconst [0] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               if o1.AuxInt != 40 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORconst [-1] _)
-       // cond:
-       // result: (MOVDconst [-1])
-       for {
-               if v.AuxInt != -1 {
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
-               return true
-       }
-       // match: (ORconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c|d])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if o2.AuxInt != 32 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c | d
-               return true
-       }
-       // match: (ORconst [c] (ORconst [d] x))
-       // cond:
-       // result: (ORconst [c|d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ORconst {
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUloadidx {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c | d
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORconst [c1] (ANDconst [c2] x))
-       // cond: c2|c1 == ^0
-       // result: (ORconst [c1] x)
-       for {
-               c1 := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o2.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               c2 := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c2|c1 == ^0) {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c1
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ORshiftLL (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ORconst [c] (SLLconst <x.Type> x [d]))
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if x1.AuxInt != 4 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (ORshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               s := x1.Aux
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL x y:(SLLconst x [c]) [d])
-       // cond: c==d
-       // result: y
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SLLconst {
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               c := y.AuxInt
-               if x != y.Args[0] {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if !(c == d) {
+               if x2.AuxInt != 5 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
-               return true
-       }
-       // match: (ORshiftLL [c] (SRLconst x [64-c]) x)
-       // cond:
-       // result: (RORconst [64-c] x)
-       for {
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if x2.Aux != s {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
                        break
                }
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               v.reset(OpARM64RORconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x)
-       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
-       // result: (RORWconst [32-c] x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               if x3.AuxInt != 6 {
                        break
                }
-               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
+               if x3.Aux != s {
                        break
                }
-               v.reset(OpARM64RORWconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [c] (SRLconst x [64-c]) x2)
-       // cond:
-       // result: (EXTRconst [64-c] x2 x)
-       for {
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               if mem != x3.Args[1] {
                        break
                }
-               x := v_0.Args[0]
-               x2 := v.Args[1]
-               v.reset(OpARM64EXTRconst)
-               v.AuxInt = 64 - c
-               v.AddArg(x2)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL <t> [c] (UBFX [bfc] x) x2)
-       // cond: c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
-       // result: (EXTRWconst [32-c] x2 x)
-       for {
-               t := v.Type
-               c := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               x2 := v.Args[1]
-               if !(c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)) {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               v.reset(OpARM64EXTRWconst)
-               v.AuxInt = 32 - c
-               v.AddArg(x2)
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y))
-       // cond: sc == getARM64BFwidth(bfc)
-       // result: (BFXIL [bfc] y x)
-       for {
-               sc := v.AuxInt
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               if x4.AuxInt != 7 {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if x4.Aux != s {
                        break
                }
-               if v_1.AuxInt != sc {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               y := v_1.Args[0]
-               if !(sc == getARM64BFwidth(bfc)) {
+               if mem != x4.Args[1] {
                        break
                }
-               v.reset(OpARM64BFXIL)
-               v.AuxInt = bfc
-               v.AddArg(y)
-               v.AddArg(x)
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr0)
+               v0.AddArg(idx0)
+               v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx4 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
        for {
                t := v.Type
-               if v.AuxInt != 8 {
+               if v.AuxInt != 56 {
                        break
                }
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
+               if o0.AuxInt != 48 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := v.Args[1]
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 40 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUloadidx4 {
+                       break
+               }
+               _ = x0.Args[2]
+               ptr0 := x0.Args[0]
+               idx0 := x0.Args[1]
+               mem := x0.Args[2]
+               y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -22093,115 +24737,142 @@ func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               if x1.AuxInt != 4 {
                        break
                }
+               s := x1.Aux
                _ = x1.Args[1]
-               if p != x1.Args[0] {
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADDshiftLL {
+                       break
+               }
+               if p1.AuxInt != 2 {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
-       for {
-               t := v.Type
-               if v.AuxInt != 8 {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x2.AuxInt != 5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[1]
+               p := x2.Args[0]
+               if mem != x2.Args[1] {
+                       break
+               }
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
+                       break
+               }
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x3.AuxInt != 6 {
                        break
                }
-               _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               if x3.Aux != s {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               _ = x3.Args[1]
+               if p != x3.Args[0] {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
-               y1 := v.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               if mem != x3.Args[1] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               if x1.AuxInt != 1 {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUload {
                        break
                }
-               s := x1.Aux
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if x4.AuxInt != 7 {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               if x4.Aux != s {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               _ = x4.Args[1]
+               if p != x4.Args[0] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               if mem != x4.Args[1] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AddArg(ptr0)
-               v0.AddArg(idx0)
+               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, idx0.Type)
+               v1.AuxInt = 2
+               v1.AddArg(idx0)
+               v0.AddArg(v1)
                v0.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
        for {
                t := v.Type
-               if v.AuxInt != 8 {
+               if v.AuxInt != 56 {
                        break
                }
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
+               if o0.AuxInt != 48 {
+                       break
+               }
+               _ = o0.Args[1]
+               o1 := o0.Args[0]
+               if o1.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o1.AuxInt != 40 {
+                       break
+               }
+               _ = o1.Args[1]
+               o2 := o1.Args[0]
+               if o2.Op != OpARM64ORshiftLL {
+                       break
+               }
+               if o2.AuxInt != 32 {
+                       break
+               }
+               _ = o2.Args[1]
+               x0 := o2.Args[0]
+               if x0.Op != OpARM64MOVWUloadidx {
                        break
                }
                _ = x0.Args[2]
                ptr := x0.Args[0]
                idx := x0.Args[1]
                mem := x0.Args[2]
-               y1 := v.Args[1]
+               y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -22217,7 +24888,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if x1_1.AuxInt != 1 {
+               if x1_1.AuxInt != 4 {
                        break
                }
                if idx != x1_1.Args[0] {
@@ -22226,202 +24897,185 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               y2 := o1.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
-       // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
-       for {
-               t := v.Type
-               if v.AuxInt != 24 {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if o0.AuxInt != 16 {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUload {
+               if x2_1.AuxInt != 5 {
                        break
                }
-               i0 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               if idx != x2_1.Args[0] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if mem != x2.Args[2] {
                        break
                }
-               i2 := x1.AuxInt
-               if x1.Aux != s {
+               y3 := o0.Args[1]
+               if y3.Op != OpARM64MOVDnop {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               x3 := y3.Args[0]
+               if x3.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if mem != x1.Args[1] {
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
                        break
                }
-               y2 := v.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if x3_1.AuxInt != 6 {
                        break
                }
-               i3 := x2.AuxInt
-               if x2.Aux != s {
+               if idx != x3_1.Args[0] {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if mem != x3.Args[2] {
                        break
                }
-               if mem != x2.Args[1] {
+               y4 := v.Args[1]
+               if y4.Op != OpARM64MOVDnop {
                        break
                }
-               if !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               x4 := y4.Args[0]
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
-               v0.AddArg(v1)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
-       for {
-               t := v.Type
-               if v.AuxInt != 24 {
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64ADDconst {
                        break
                }
-               if o0.AuxInt != 16 {
+               if x4_1.AuxInt != 7 {
                        break
                }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
+               if idx != x4_1.Args[0] {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               if mem != x4.Args[2] {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
-               if x1.AuxInt != 2 {
+               b = mergePoint(b, x0, x1, x2, x3, x4)
+               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               s := x1.Aux
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               y2 := v.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               i1 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 3 {
+               i0 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               if x2.Aux != s {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               _ = x2.Args[1]
-               p := x2.Args[0]
-               if mem != x2.Args[1] {
+               if mem != x1.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
+               v1.AuxInt = i0
+               v1.Aux = s
+               v1.AddArg(p)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
-               if v.AuxInt != 24 {
+               if v.AuxInt != 8 {
                        break
                }
                _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if o0.AuxInt != 16 {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUload {
                        break
                }
-               _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
+               if x0.AuxInt != 1 {
                        break
                }
-               _ = x0.Args[2]
-               ptr := x0.Args[0]
-               idx := x0.Args[1]
-               mem := x0.Args[2]
-               y1 := o0.Args[1]
+               s := x0.Aux
+               _ = x0.Args[1]
+               p1 := x0.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               mem := x0.Args[1]
+               y1 := v.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
@@ -22430,62 +25084,88 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               ptr0 := x1.Args[0]
+               idx0 := x1.Args[1]
+               if mem != x1.Args[2] {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               if x1_1.AuxInt != 2 {
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
+       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 8 {
                        break
                }
-               if idx != x1_1.Args[0] {
+               _ = v.Args[1]
+               y0 := v.Args[0]
+               if y0.Op != OpARM64MOVDnop {
                        break
                }
-               if mem != x1.Args[2] {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVBUloadidx {
                        break
                }
-               y2 := v.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               if x0_1.AuxInt != 1 {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
+               y1 := v.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if x2_1.AuxInt != 3 {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               if idx != x2_1.Args[0] {
+               if idx != x1.Args[1] {
                        break
                }
-               if mem != x2.Args[2] {
+               if mem != x1.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               b = mergePoint(b, x0, x1)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] x0:(MOVHUloadidx2 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                if v.AuxInt != 24 {
@@ -22500,14 +25180,19 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = o0.Args[1]
-               x0 := o0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx2 {
+               y0 := o0.Args[0]
+               if y0.Op != OpARM64REV16W {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVHUload {
+                       break
+               }
+               i2 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
                y1 := o0.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -22516,21 +25201,14 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 2 {
+               i1 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s := x1.Aux
                _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADDshiftLL {
-                       break
-               }
-               if p1.AuxInt != 1 {
+               if p != x1.Args[0] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -22542,38 +25220,40 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 3 {
-                       break
-               }
+               i0 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
                _ = x2.Args[1]
-               p := x2.Args[0]
+               if p != x2.Args[0] {
+                       break
+               }
                if mem != x2.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               if !(i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, idx0.Type)
-               v1.AuxInt = 1
-               v1.AddArg(idx0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUload [i0] {s} p mem) y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
-       // cond: i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
-               if v.AuxInt != 56 {
+               if v.AuxInt != 24 {
                        break
                }
                _ = v.Args[1]
@@ -22581,129 +25261,174 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if o0.AuxInt != 48 {
+               if o0.AuxInt != 16 {
                        break
                }
                _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o1.AuxInt != 40 {
-                       break
-               }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               y0 := o0.Args[0]
+               if y0.Op != OpARM64REV16W {
                        break
                }
-               if o2.AuxInt != 32 {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVHUload {
                        break
                }
-               _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUload {
+               if x0.AuxInt != 2 {
                        break
                }
-               i0 := x0.AuxInt
                s := x0.Aux
                _ = x0.Args[1]
                p := x0.Args[0]
                mem := x0.Args[1]
-               y1 := o2.Args[1]
+               y1 := o0.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUload {
+                       break
+               }
+               if x1.AuxInt != 1 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[1]
+               p1 := x1.Args[0]
+               if p1.Op != OpARM64ADD {
+                       break
+               }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
+               if mem != x1.Args[1] {
+                       break
+               }
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
+                       break
+               }
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
+                       break
+               }
+               _ = x2.Args[2]
+               ptr0 := x2.Args[0]
+               idx0 := x2.Args[1]
+               if mem != x2.Args[2] {
+                       break
+               }
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
                        break
                }
-               i4 := x1.AuxInt
-               if x1.Aux != s {
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr0)
+               v1.AddArg(idx0)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
+               return true
+       }
+       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
+       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
+       for {
+               t := v.Type
+               if v.AuxInt != 24 {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               _ = v.Args[1]
+               o0 := v.Args[0]
+               if o0.Op != OpARM64ORshiftLL {
                        break
                }
-               if mem != x1.Args[1] {
+               if o0.AuxInt != 16 {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               _ = o0.Args[1]
+               y0 := o0.Args[0]
+               if y0.Op != OpARM64REV16W {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVHUloadidx {
                        break
                }
-               i5 := x2.AuxInt
-               if x2.Aux != s {
+               _ = x0.Args[2]
+               ptr := x0.Args[0]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if x0_1.AuxInt != 2 {
                        break
                }
-               if mem != x2.Args[1] {
+               idx := x0_1.Args[0]
+               mem := x0.Args[2]
+               y1 := o0.Args[1]
+               if y1.Op != OpARM64MOVDnop {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               x1 := y1.Args[0]
+               if x1.Op != OpARM64MOVBUloadidx {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
                        break
                }
-               i6 := x3.AuxInt
-               if x3.Aux != s {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               if x1_1.AuxInt != 1 {
                        break
                }
-               if mem != x3.Args[1] {
+               if idx != x1_1.Args[0] {
                        break
                }
-               y4 := v.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               if mem != x1.Args[2] {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               y2 := v.Args[1]
+               if y2.Op != OpARM64MOVDnop {
                        break
                }
-               i7 := x4.AuxInt
-               if x4.Aux != s {
+               x2 := y2.Args[0]
+               if x2.Op != OpARM64MOVBUloadidx {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
                        break
                }
-               if mem != x4.Args[1] {
+               if idx != x2.Args[1] {
                        break
                }
-               if !(i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if mem != x2.Args[2] {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+                       break
+               }
+               b = mergePoint(b, x0, x1, x2)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.Aux = s
-               v1 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v1.AuxInt = i0
-               v1.AddArg(p)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
+       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -22734,14 +25459,19 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx {
+               y0 := o2.Args[0]
+               if y0.Op != OpARM64REVW {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVWUload {
+                       break
+               }
+               i4 := x0.AuxInt
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
                y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -22750,18 +25480,14 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 4 {
+               i3 := x1.AuxInt
+               if x1.Aux != s {
                        break
                }
-               s := x1.Aux
                _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               if p != x1.Args[0] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -22773,14 +25499,14 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 5 {
-                       break
-               }
+               i2 := x2.AuxInt
                if x2.Aux != s {
                        break
                }
                _ = x2.Args[1]
-               p := x2.Args[0]
+               if p != x2.Args[0] {
+                       break
+               }
                if mem != x2.Args[1] {
                        break
                }
@@ -22792,9 +25518,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != 6 {
-                       break
-               }
+               i1 := x3.AuxInt
                if x3.Aux != s {
                        break
                }
@@ -22813,9 +25537,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x4.Op != OpARM64MOVBUload {
                        break
                }
-               if x4.AuxInt != 7 {
-                       break
-               }
+               i0 := x4.AuxInt
                if x4.Aux != s {
                        break
                }
@@ -22826,21 +25548,26 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if mem != x4.Args[1] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v0.AddArg(idx0)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
+               v1.Aux = s
+               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
+               v2.AuxInt = i0
+               v2.AddArg(p)
+               v1.AddArg(v2)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx4 ptr0 idx0 mem) y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUload [5] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [6] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
+       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -22871,14 +25598,21 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = o2.Args[1]
-               x0 := o2.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx4 {
+               y0 := o2.Args[0]
+               if y0.Op != OpARM64REVW {
                        break
                }
-               _ = x0.Args[2]
-               ptr0 := x0.Args[0]
-               idx0 := x0.Args[1]
-               mem := x0.Args[2]
+               x0 := y0.Args[0]
+               if x0.Op != OpARM64MOVWUload {
+                       break
+               }
+               if x0.AuxInt != 4 {
+                       break
+               }
+               s := x0.Aux
+               _ = x0.Args[1]
+               p := x0.Args[0]
+               mem := x0.Args[1]
                y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
                        break
@@ -22887,21 +25621,16 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1.Op != OpARM64MOVBUload {
                        break
                }
-               if x1.AuxInt != 4 {
+               if x1.AuxInt != 3 {
                        break
                }
-               s := x1.Aux
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADDshiftLL {
+               if x1.Aux != s {
                        break
                }
-               if p1.AuxInt != 2 {
+               _ = x1.Args[1]
+               if p != x1.Args[0] {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
                if mem != x1.Args[1] {
                        break
                }
@@ -22913,14 +25642,16 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2.Op != OpARM64MOVBUload {
                        break
                }
-               if x2.AuxInt != 5 {
+               if x2.AuxInt != 2 {
                        break
                }
                if x2.Aux != s {
                        break
                }
                _ = x2.Args[1]
-               p := x2.Args[0]
+               if p != x2.Args[0] {
+                       break
+               }
                if mem != x2.Args[1] {
                        break
                }
@@ -22932,16 +25663,20 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x3.Op != OpARM64MOVBUload {
                        break
                }
-               if x3.AuxInt != 6 {
+               if x3.AuxInt != 1 {
                        break
                }
                if x3.Aux != s {
                        break
                }
                _ = x3.Args[1]
-               if p != x3.Args[0] {
+               p1 := x3.Args[0]
+               if p1.Op != OpARM64ADD {
                        break
                }
+               _ = p1.Args[1]
+               ptr1 := p1.Args[0]
+               idx1 := p1.Args[1]
                if mem != x3.Args[1] {
                        break
                }
@@ -22950,40 +25685,32 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x4.AuxInt != 7 {
-                       break
-               }
-               if x4.Aux != s {
-                       break
-               }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               if x4.Op != OpARM64MOVBUloadidx {
                        break
                }
-               if mem != x4.Args[1] {
+               _ = x4.Args[2]
+               ptr0 := x4.Args[0]
+               idx0 := x4.Args[1]
+               if mem != x4.Args[2] {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr0)
-               v1 := b.NewValue0(v.Pos, OpARM64SLLconst, idx0.Type)
-               v1.AuxInt = 2
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr0)
                v1.AddArg(idx0)
+               v1.AddArg(mem)
                v0.AddArg(v1)
-               v0.AddArg(mem)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] x0:(MOVWUloadidx ptr idx mem) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
+       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
+       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
        for {
                t := v.Type
                if v.AuxInt != 56 {
@@ -23014,13 +25741,24 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                        break
                }
                _ = o2.Args[1]
-               x0 := o2.Args[0]
+               y0 := o2.Args[0]
+               if y0.Op != OpARM64REVW {
+                       break
+               }
+               x0 := y0.Args[0]
                if x0.Op != OpARM64MOVWUloadidx {
                        break
                }
                _ = x0.Args[2]
                ptr := x0.Args[0]
-               idx := x0.Args[1]
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64ADDconst {
+                       break
+               }
+               if x0_1.AuxInt != 4 {
+                       break
+               }
+               idx := x0_1.Args[0]
                mem := x0.Args[2]
                y1 := o2.Args[1]
                if y1.Op != OpARM64MOVDnop {
@@ -23038,7 +25776,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x1_1.Op != OpARM64ADDconst {
                        break
                }
-               if x1_1.AuxInt != 4 {
+               if x1_1.AuxInt != 3 {
                        break
                }
                if idx != x1_1.Args[0] {
@@ -23063,7 +25801,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x2_1.Op != OpARM64ADDconst {
                        break
                }
-               if x2_1.AuxInt != 5 {
+               if x2_1.AuxInt != 2 {
                        break
                }
                if idx != x2_1.Args[0] {
@@ -23088,7 +25826,7 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if x3_1.Op != OpARM64ADDconst {
                        break
                }
-               if x3_1.AuxInt != 6 {
+               if x3_1.AuxInt != 1 {
                        break
                }
                if idx != x3_1.Args[0] {
@@ -23109,944 +25847,1820 @@ func rewriteValueARM64_OpARM64ORshiftLL_10(v *Value) bool {
                if ptr != x4.Args[0] {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x4_1.AuxInt != 7 {
-                       break
-               }
-               if idx != x4_1.Args[0] {
+               if idx != x4.Args[1] {
                        break
                }
                if mem != x4.Args[2] {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
                        break
                }
                b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
                v.reset(OpCopy)
                v.AddArg(v0)
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
+               v1.AddArg(ptr)
+               v1.AddArg(idx)
+               v1.AddArg(mem)
+               v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftRA (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ORconst [c] (SRAconst <x.Type> x [d]))
        for {
-               t := v.Type
-               if v.AuxInt != 8 {
-                       break
-               }
+               d := v.AuxInt
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               i1 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := v.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               i0 := x1.AuxInt
-               if x1.Aux != s {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ORshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [c>>uint64(d)])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c >> uint64(d)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftRA x y:(SRAconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRAconst {
                        break
                }
-               if mem != x1.Args[1] {
+               c := y.AuxInt
+               if x != y.Args[0] {
                        break
                }
-               if !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               if !(c == d) {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVHUload, t)
-               v1.AuxInt = i0
-               v1.Aux = s
-               v1.AddArg(p)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64ORshiftLL_20(v *Value) bool {
+func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
+       // match: (ORshiftRL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ORconst [c] (SRLconst <x.Type> x [d]))
        for {
-               t := v.Type
-               if v.AuxInt != 8 {
-                       break
-               }
+               d := v.AuxInt
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUload {
-                       break
-               }
-               if x0.AuxInt != 1 {
-                       break
-               }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p1 := x0.Args[0]
-               if p1.Op != OpARM64ADD {
-                       break
-               }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               mem := x0.Args[1]
-               y1 := v.Args[1]
-               if y1.Op != OpARM64MOVDnop {
-                       break
-               }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x1.Args[2]
-               ptr0 := x1.Args[0]
-               idx0 := x1.Args[1]
-               if mem != x1.Args[2] {
-                       break
-               }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
-               v.reset(OpCopy)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
                return true
        }
-       // match: (ORshiftLL <t> [8] y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)
-       // result: @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
+       // match: (ORshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst x [int64(uint64(c)>>uint64(d))])
        for {
-               t := v.Type
-               if v.AuxInt != 8 {
-                       break
-               }
+               d := v.AuxInt
                _ = v.Args[1]
-               y0 := v.Args[0]
-               if y0.Op != OpARM64MOVDnop {
-                       break
-               }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVBUloadidx {
-                       break
-               }
-               _ = x0.Args[2]
-               ptr := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst {
-                       break
-               }
-               if x0_1.AuxInt != 1 {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               idx := x0_1.Args[0]
-               mem := x0.Args[2]
-               y1 := v.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftRL x y:(SRLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRLconst {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               c := y.AuxInt
+               if x != y.Args[0] {
                        break
-               }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               }
+               if !(c == d) {
                        break
                }
-               if idx != x1.Args[1] {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORshiftRL [c] (SLLconst x [64-c]) x)
+       // cond:
+       // result: (RORconst [ c] x)
+       for {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               if v_0.AuxInt != 64-c {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0) && clobber(x1) && clobber(y0) && clobber(y1)) {
+               x := v_0.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               b = mergePoint(b, x0, x1)
-               v0 := b.NewValue0(v.Pos, OpARM64REV16W, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVHUloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64RORconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [i2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
+       // cond: c < 32 && t.Size() == 4
+       // result: (RORWconst [c] x)
        for {
                t := v.Type
-               if v.AuxInt != 24 {
+               c := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               if v_0.AuxInt != 32-c {
                        break
                }
-               if o0.AuxInt != 16 {
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
                        break
                }
-               _ = o0.Args[1]
-               y0 := o0.Args[0]
-               if y0.Op != OpARM64REV16W {
+               if x != v_1.Args[0] {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVHUload {
+               if !(c < 32 && t.Size() == 4) {
                        break
                }
-               i2 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               v.reset(OpARM64RORWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
+       // cond: lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
+       // result: (BFI [arm64BFAuxInt(lc-rc, 64-lc)] x y)
+       for {
+               rc := v.AuxInt
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               ac := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               i1 := x1.AuxInt
-               if x1.Aux != s {
+               lc := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(lc > rc && ac == ^((1<<uint(64-lc)-1)<<uint64(lc-rc))) {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               v.reset(OpARM64BFI)
+               v.AuxInt = arm64BFAuxInt(lc-rc, 64-lc)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64RORWconst_0(v *Value) bool {
+       // match: (RORWconst [c] (RORWconst [d] x))
+       // cond:
+       // result: (RORWconst [(c+d)&31] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64RORWconst {
                        break
                }
-               if mem != x1.Args[1] {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64RORWconst)
+               v.AuxInt = (c + d) & 31
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64RORconst_0(v *Value) bool {
+       // match: (RORconst [c] (RORconst [d] x))
+       // cond:
+       // result: (RORconst [(c+d)&63] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64RORconst {
                        break
                }
-               y2 := v.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64RORconst)
+               v.AuxInt = (c + d) & 63
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SLL_0(v *Value) bool {
+       // match: (SLL x (MOVDconst [c]))
+       // cond:
+       // result: (SLLconst x [c&63])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               c := v_1.AuxInt
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SLLconst_0(v *Value) bool {
+       // match: (SLLconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [d<<uint64(c)])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               i0 := x2.AuxInt
-               if x2.Aux != s {
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = d << uint64(c)
+               return true
+       }
+       // match: (SLLconst [c] (SRLconst [c] x))
+       // cond: 0 < c && c < 64
+       // result: (ANDconst [^(1<<uint(c)-1)] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               if v_0.AuxInt != c {
                        break
                }
-               if mem != x2.Args[1] {
+               x := v_0.Args[0]
+               if !(0 < c && c < 64) {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = ^(1<<uint(c) - 1)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SLLconst [sc] (ANDconst [ac] x))
+       // cond: isARM64BFMask(sc, ac, 0)
+       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               ac := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, ac, 0)) {
+                       break
+               }
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, 0))
+               v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUload [2] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
+       // match: (SLLconst [sc] (MOVWUreg x))
+       // cond: isARM64BFMask(sc, 1<<32-1, 0)
+       // result: (UBFIZ [arm64BFAuxInt(sc, 32)] x)
        for {
-               t := v.Type
-               if v.AuxInt != 24 {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVWUreg {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
                        break
                }
-               if o0.AuxInt != 16 {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, 32)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SLLconst [sc] (MOVHUreg x))
+       // cond: isARM64BFMask(sc, 1<<16-1, 0)
+       // result: (UBFIZ [arm64BFAuxInt(sc, 16)] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVHUreg {
                        break
                }
-               _ = o0.Args[1]
-               y0 := o0.Args[0]
-               if y0.Op != OpARM64REV16W {
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVHUload {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, 16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SLLconst [sc] (MOVBUreg x))
+       // cond: isARM64BFMask(sc, 1<<8-1, 0)
+       // result: (UBFIZ [arm64BFAuxInt(sc, 8)] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVBUreg {
                        break
                }
-               if x0.AuxInt != 2 {
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<8-1, 0)) {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc, 8)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SLLconst [sc] (UBFIZ [bfc] x))
+       // cond: sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
+       // result: (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFIZ {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64) {
                        break
                }
-               if x1.AuxInt != 1 {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SRA_0(v *Value) bool {
+       // match: (SRA x (MOVDconst [c]))
+       // cond:
+       // result: (SRAconst x [c&63])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x1.Aux != s {
+               c := v_1.AuxInt
+               v.reset(OpARM64SRAconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SRAconst_0(v *Value) bool {
+       // match: (SRAconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [d>>uint64(c)])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x1.Args[1]
-               p1 := x1.Args[0]
-               if p1.Op != OpARM64ADD {
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = d >> uint64(c)
+               return true
+       }
+       // match: (SRAconst [rc] (SLLconst [lc] x))
+       // cond: lc > rc
+       // result: (SBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
+       for {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x1.Args[1] {
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc > rc) {
                        break
                }
-               y2 := v.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc-rc, 64-lc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRAconst [rc] (SLLconst [lc] x))
+       // cond: lc <= rc
+       // result: (SBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
+       for {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc <= rc) {
                        break
                }
-               _ = x2.Args[2]
-               ptr0 := x2.Args[0]
-               idx0 := x2.Args[1]
-               if mem != x2.Args[2] {
+               v.reset(OpARM64SBFX)
+               v.AuxInt = arm64BFAuxInt(rc-lc, 64-rc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRAconst [rc] (MOVWreg x))
+       // cond: rc < 32
+       // result: (SBFX [arm64BFAuxInt(rc, 32-rc)] x)
+       for {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVWreg {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               x := v_0.Args[0]
+               if !(rc < 32) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64SBFX)
+               v.AuxInt = arm64BFAuxInt(rc, 32-rc)
+               v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [24] o0:(ORshiftLL [16] y0:(REV16W x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)
-       // result: @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
+       // match: (SRAconst [rc] (MOVHreg x))
+       // cond: rc < 16
+       // result: (SBFX [arm64BFAuxInt(rc, 16-rc)] x)
        for {
-               t := v.Type
-               if v.AuxInt != 24 {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVHreg {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               x := v_0.Args[0]
+               if !(rc < 16) {
                        break
                }
-               if o0.AuxInt != 16 {
+               v.reset(OpARM64SBFX)
+               v.AuxInt = arm64BFAuxInt(rc, 16-rc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRAconst [rc] (MOVBreg x))
+       // cond: rc < 8
+       // result: (SBFX [arm64BFAuxInt(rc, 8-rc)] x)
+       for {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVBreg {
                        break
                }
-               _ = o0.Args[1]
-               y0 := o0.Args[0]
-               if y0.Op != OpARM64REV16W {
+               x := v_0.Args[0]
+               if !(rc < 8) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVHUloadidx {
+               v.reset(OpARM64SBFX)
+               v.AuxInt = arm64BFAuxInt(rc, 8-rc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRAconst [sc] (SBFIZ [bfc] x))
+       // cond: sc < getARM64BFlsb(bfc)
+       // result: (SBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SBFIZ {
                        break
                }
-               _ = x0.Args[2]
-               ptr := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc < getARM64BFlsb(bfc)) {
                        break
                }
-               if x0_1.AuxInt != 2 {
+               v.reset(OpARM64SBFIZ)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRAconst [sc] (SBFIZ [bfc] x))
+       // cond: sc >= getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
+       // result: (SBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SBFIZ {
                        break
                }
-               idx := x0_1.Args[0]
-               mem := x0.Args[2]
-               y1 := o0.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc >= getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64SBFX)
+               v.AuxInt = arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SRL_0(v *Value) bool {
+       // match: (SRL x (MOVDconst [c]))
+       // cond:
+       // result: (SRLconst x [c&63])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               c := v_1.AuxInt
+               v.reset(OpARM64SRLconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SRLconst_0(v *Value) bool {
+       // match: (SRLconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(uint64(d)>>uint64(c))])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint64(d) >> uint64(c))
+               return true
+       }
+       // match: (SRLconst [c] (SLLconst [c] x))
+       // cond: 0 < c && c < 64
+       // result: (ANDconst [1<<uint(64-c)-1] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               if v_0.AuxInt != c {
                        break
                }
-               if x1_1.AuxInt != 1 {
+               x := v_0.Args[0]
+               if !(0 < c && c < 64) {
                        break
                }
-               if idx != x1_1.Args[0] {
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = 1<<uint(64-c) - 1
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [rc] (SLLconst [lc] x))
+       // cond: lc > rc
+       // result: (UBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
+       for {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc > rc) {
                        break
                }
-               y2 := v.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(lc-rc, 64-lc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [sc] (ANDconst [ac] x))
+       // cond: isARM64BFMask(sc, ac, sc)
+       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ANDconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               ac := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, ac, sc)) {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [sc] (MOVWUreg x))
+       // cond: isARM64BFMask(sc, 1<<32-1, sc)
+       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVWUreg {
                        break
                }
-               if idx != x2.Args[1] {
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
                        break
                }
-               if mem != x2.Args[2] {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [sc] (MOVHUreg x))
+       // cond: isARM64BFMask(sc, 1<<16-1, sc)
+       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVHUreg {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(o0)) {
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2)
-               v0 := b.NewValue0(v.Pos, OpARM64REVW, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVWUloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
+               v.AddArg(x)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [i4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem))) y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
-       // cond: i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
+       // match: (SRLconst [sc] (MOVBUreg x))
+       // cond: isARM64BFMask(sc, 1<<8-1, sc)
+       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
        for {
-               t := v.Type
-               if v.AuxInt != 56 {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVBUreg {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               x := v_0.Args[0]
+               if !(isARM64BFMask(sc, 1<<8-1, sc)) {
                        break
                }
-               if o0.AuxInt != 48 {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [rc] (SLLconst [lc] x))
+       // cond: lc < rc
+       // result: (UBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
+       for {
+               rc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               lc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(lc < rc) {
                        break
                }
-               if o1.AuxInt != 40 {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(rc-lc, 64-rc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [sc] (UBFX [bfc] x))
+       // cond: sc < getARM64BFwidth(bfc)
+       // result: (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFX {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc < getARM64BFwidth(bfc)) {
                        break
                }
-               if o2.AuxInt != 32 {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [sc] (UBFIZ [bfc] x))
+       // cond: sc == getARM64BFlsb(bfc)
+       // result: (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFIZ {
                        break
                }
-               _ = o2.Args[1]
-               y0 := o2.Args[0]
-               if y0.Op != OpARM64REVW {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc == getARM64BFlsb(bfc)) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVWUload {
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = 1<<uint(getARM64BFwidth(bfc)) - 1
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SRLconst_10(v *Value) bool {
+       // match: (SRLconst [sc] (UBFIZ [bfc] x))
+       // cond: sc < getARM64BFlsb(bfc)
+       // result: (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFIZ {
                        break
                }
-               i4 := x0.AuxInt
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc < getARM64BFlsb(bfc)) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SRLconst [sc] (UBFIZ [bfc] x))
+       // cond: sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
+       // result: (UBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
+       for {
+               sc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64UBFIZ {
                        break
                }
-               i3 := x1.AuxInt
-               if x1.Aux != s {
+               bfc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)) {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64STP_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem)
+       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (STP [off1+off2] {sym} ptr val1 val2 mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               if mem != x1.Args[1] {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val1 := v.Args[1]
+               val2 := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+                       break
+               }
+               v.reset(OpARM64STP)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val1)
+               v.AddArg(val2)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
+       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
+       // result: (STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val1 := v.Args[1]
+               val2 := v.Args[2]
+               mem := v.Args[3]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               v.reset(OpARM64STP)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val1)
+               v.AddArg(val2)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVQstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               i2 := x2.AuxInt
-               if x2.Aux != s {
+               if v_1.AuxInt != 0 {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64MOVDconst {
                        break
                }
-               if mem != x2.Args[1] {
+               if v_2.AuxInt != 0 {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               mem := v.Args[3]
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SUB x (MOVDconst [c]))
+       // cond:
+       // result: (SUBconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUB a l:(MUL x y))
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUB a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MUL {
                        break
                }
-               i1 := x3.AuxInt
-               if x3.Aux != s {
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
                        break
                }
-               _ = x3.Args[1]
-               if p != x3.Args[0] {
+               v.reset(OpARM64MSUB)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB a l:(MNEG x y))
+       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADD a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MNEG {
                        break
                }
-               if mem != x3.Args[1] {
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
                        break
                }
-               y4 := v.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               v.reset(OpARM64MADD)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB a l:(MULW x y))
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MSUBW a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MULW {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUload {
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
                        break
                }
-               i0 := x4.AuxInt
-               if x4.Aux != s {
+               v.reset(OpARM64MSUBW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB a l:(MNEGW x y))
+       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+       // result: (MADDW a x y)
+       for {
+               _ = v.Args[1]
+               a := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpARM64MNEGW {
                        break
                }
-               _ = x4.Args[1]
-               if p != x4.Args[0] {
+               _ = l.Args[1]
+               x := l.Args[0]
+               y := l.Args[1]
+               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
                        break
                }
-               if mem != x4.Args[1] {
+               v.reset(OpARM64MADDW)
+               v.AddArg(a)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB x x)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
                        break
                }
-               if !(i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SUB x (SUB y z))
+       // cond:
+       // result: (SUB (ADD <v.Type> x z) y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SUB {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.reset(OpCopy)
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               z := v_1.Args[1]
+               v.reset(OpARM64SUB)
+               v0 := b.NewValue0(v.Pos, OpARM64ADD, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(z)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDload, t)
-               v1.Aux = s
-               v2 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
-               v2.AuxInt = i0
-               v2.AddArg(p)
-               v1.AddArg(v2)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.AddArg(y)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUload [4] {s} p mem)) y1:(MOVDnop x1:(MOVBUload [3] {s} p mem))) y2:(MOVDnop x2:(MOVBUload [2] {s} p mem))) y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
-       // cond: s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
+       // match: (SUB (SUB x y) z)
+       // cond:
+       // result: (SUB x (ADD <y.Type> y z))
        for {
-               t := v.Type
-               if v.AuxInt != 56 {
-                       break
-               }
                _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
-                       break
-               }
-               if o0.AuxInt != 48 {
-                       break
-               }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SUB {
                        break
                }
-               if o1.AuxInt != 40 {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               z := v.Args[1]
+               v.reset(OpARM64SUB)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64ADD, y.Type)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SUB x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (SUBshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               if o2.AuxInt != 32 {
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (SUBshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               _ = o2.Args[1]
-               y0 := o2.Args[0]
-               if y0.Op != OpARM64REVW {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVWUload {
+               v.reset(OpARM64SUBshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUB_10(v *Value) bool {
+       // match: (SUB x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (SUBshiftRA x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               if x0.AuxInt != 4 {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               s := x0.Aux
-               _ = x0.Args[1]
-               p := x0.Args[0]
-               mem := x0.Args[1]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               v.reset(OpARM64SUBshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUBconst_0(v *Value) bool {
+       // match: (SUBconst [0] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUload {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBconst [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [d-c])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if x1.AuxInt != 3 {
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = d - c
+               return true
+       }
+       // match: (SUBconst [c] (SUBconst [d] x))
+       // cond:
+       // result: (ADDconst [-c-d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SUBconst {
                        break
                }
-               if x1.Aux != s {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = -c - d
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBconst [c] (ADDconst [d] x))
+       // cond:
+       // result: (ADDconst [-c+d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
                        break
                }
-               _ = x1.Args[1]
-               if p != x1.Args[0] {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = -c + d
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUBshiftLL_0(v *Value) bool {
+       // match: (SUBshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (SUBconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if mem != x1.Args[1] {
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUload {
+               if !(c == d) {
                        break
                }
-               if x2.AuxInt != 2 {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUBshiftRA_0(v *Value) bool {
+       // match: (SUBshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (SUBconst x [c>>uint64(d)])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x2.Aux != s {
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = c >> uint64(d)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               _ = x2.Args[1]
-               if p != x2.Args[0] {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if mem != x2.Args[1] {
+               if !(c == d) {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUBshiftRL_0(v *Value) bool {
+       // match: (SUBshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (SUBconst x [int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUload {
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               if x3.AuxInt != 1 {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               if x3.Aux != s {
+               if !(c == d) {
                        break
                }
-               _ = x3.Args[1]
-               p1 := x3.Args[0]
-               if p1.Op != OpARM64ADD {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64TST_0(v *Value) bool {
+       // match: (TST x (MOVDconst [c]))
+       // cond:
+       // result: (TSTconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = p1.Args[1]
-               ptr1 := p1.Args[0]
-               idx1 := p1.Args[1]
-               if mem != x3.Args[1] {
+               c := v_1.AuxInt
+               v.reset(OpARM64TSTconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (TST (MOVDconst [c]) x)
+       // cond:
+       // result: (TSTconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               y4 := v.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64TSTconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64TSTW_0(v *Value) bool {
+       // match: (TSTW x (MOVDconst [c]))
+       // cond:
+       // result: (TSTWconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               c := v_1.AuxInt
+               v.reset(OpARM64TSTWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (TSTW (MOVDconst [c]) x)
+       // cond:
+       // result: (TSTWconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x4.Args[2]
-               ptr0 := x4.Args[0]
-               idx0 := x4.Args[1]
-               if mem != x4.Args[2] {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64TSTWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64TSTWconst_0(v *Value) bool {
+       // match: (TSTWconst (MOVDconst [x]) [y])
+       // cond: int32(x&y)==0
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if !(s == nil && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1)) && isSamePtr(p1, p) && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               x := v_0.AuxInt
+               if !(int32(x&y) == 0) {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr0)
-               v1.AddArg(idx0)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               v.reset(OpARM64FlagEQ)
                return true
        }
-       // match: (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] y0:(REVW x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem)) y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem))) y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem))) y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem))) y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b,x0,x1,x2,x3,x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)
-       // result: @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
+       // match: (TSTWconst (MOVDconst [x]) [y])
+       // cond: int32(x&y)<0
+       // result: (FlagLT_UGT)
        for {
-               t := v.Type
-               if v.AuxInt != 56 {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = v.Args[1]
-               o0 := v.Args[0]
-               if o0.Op != OpARM64ORshiftLL {
+               x := v_0.AuxInt
+               if !(int32(x&y) < 0) {
                        break
                }
-               if o0.AuxInt != 48 {
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (TSTWconst (MOVDconst [x]) [y])
+       // cond: int32(x&y)>0
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o0.Args[1]
-               o1 := o0.Args[0]
-               if o1.Op != OpARM64ORshiftLL {
+               x := v_0.AuxInt
+               if !(int32(x&y) > 0) {
                        break
                }
-               if o1.AuxInt != 40 {
+               v.reset(OpARM64FlagGT_UGT)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64TSTconst_0(v *Value) bool {
+       // match: (TSTconst (MOVDconst [x]) [y])
+       // cond: int64(x&y)==0
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o1.Args[1]
-               o2 := o1.Args[0]
-               if o2.Op != OpARM64ORshiftLL {
+               x := v_0.AuxInt
+               if !(int64(x&y) == 0) {
                        break
                }
-               if o2.AuxInt != 32 {
+               v.reset(OpARM64FlagEQ)
+               return true
+       }
+       // match: (TSTconst (MOVDconst [x]) [y])
+       // cond: int64(x&y)<0
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = o2.Args[1]
-               y0 := o2.Args[0]
-               if y0.Op != OpARM64REVW {
+               x := v_0.AuxInt
+               if !(int64(x&y) < 0) {
                        break
                }
-               x0 := y0.Args[0]
-               if x0.Op != OpARM64MOVWUloadidx {
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (TSTconst (MOVDconst [x]) [y])
+       // cond: int64(x&y)>0
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x0.Args[2]
-               ptr := x0.Args[0]
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpARM64ADDconst {
+               x := v_0.AuxInt
+               if !(int64(x&y) > 0) {
                        break
                }
-               if x0_1.AuxInt != 4 {
+               v.reset(OpARM64FlagGT_UGT)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64UBFIZ_0(v *Value) bool {
+       // match: (UBFIZ [bfc] (SLLconst [sc] x))
+       // cond: sc < getARM64BFwidth(bfc)
+       // result: (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
+       for {
+               bfc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               idx := x0_1.Args[0]
-               mem := x0.Args[2]
-               y1 := o2.Args[1]
-               if y1.Op != OpARM64MOVDnop {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc < getARM64BFwidth(bfc)) {
                        break
                }
-               x1 := y1.Args[0]
-               if x1.Op != OpARM64MOVBUloadidx {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64UBFX_0(v *Value) bool {
+       // match: (UBFX [bfc] (SRLconst [sc] x))
+       // cond: sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
+       // result: (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
+       for {
+               bfc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
                        break
                }
-               _ = x1.Args[2]
-               if ptr != x1.Args[0] {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64) {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpARM64ADDconst {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (UBFX [bfc] (SLLconst [sc] x))
+       // cond: sc == getARM64BFlsb(bfc)
+       // result: (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
+       for {
+               bfc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if x1_1.AuxInt != 3 {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc == getARM64BFlsb(bfc)) {
                        break
                }
-               if idx != x1_1.Args[0] {
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = 1<<uint(getARM64BFwidth(bfc)) - 1
+               v.AddArg(x)
+               return true
+       }
+       // match: (UBFX [bfc] (SLLconst [sc] x))
+       // cond: sc < getARM64BFlsb(bfc)
+       // result: (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
+       for {
+               bfc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               if mem != x1.Args[2] {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc < getARM64BFlsb(bfc)) {
                        break
                }
-               y2 := o1.Args[1]
-               if y2.Op != OpARM64MOVDnop {
+               v.reset(OpARM64UBFX)
+               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))
+               v.AddArg(x)
+               return true
+       }
+       // match: (UBFX [bfc] (SLLconst [sc] x))
+       // cond: sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
+       // result: (UBFIZ [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
+       for {
+               bfc := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
                        break
                }
-               x2 := y2.Args[0]
-               if x2.Op != OpARM64MOVBUloadidx {
+               sc := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)) {
                        break
                }
-               _ = x2.Args[2]
-               if ptr != x2.Args[0] {
+               v.reset(OpARM64UBFIZ)
+               v.AuxInt = arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64UDIV_0(v *Value) bool {
+       // match: (UDIV x (MOVDconst [1]))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpARM64ADDconst {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if x2_1.AuxInt != 2 {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (UDIV x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SRLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if idx != x2_1.Args[0] {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               if mem != x2.Args[2] {
+               v.reset(OpARM64SRLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (UDIV (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(uint64(c)/uint64(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               y3 := o0.Args[1]
-               if y3.Op != OpARM64MOVDnop {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x3 := y3.Args[0]
-               if x3.Op != OpARM64MOVBUloadidx {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint64(c) / uint64(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64UDIVW_0(v *Value) bool {
+       // match: (UDIVW x (MOVDconst [c]))
+       // cond: uint32(c)==1
+       // result: x
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               _ = x3.Args[2]
-               if ptr != x3.Args[0] {
+               c := v_1.AuxInt
+               if !(uint32(c) == 1) {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpARM64ADDconst {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (UDIVW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c) && is32Bit(c)
+       // result: (SRLconst [log2(c)] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if x3_1.AuxInt != 1 {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c) && is32Bit(c)) {
                        break
                }
-               if idx != x3_1.Args[0] {
+               v.reset(OpARM64SRLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (UDIVW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c)/uint32(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if mem != x3.Args[2] {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               y4 := v.Args[1]
-               if y4.Op != OpARM64MOVDnop {
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint32(c) / uint32(d))
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool {
+       // match: (UMOD _ (MOVDconst [1]))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x4 := y4.Args[0]
-               if x4.Op != OpARM64MOVBUloadidx {
+               if v_1.AuxInt != 1 {
                        break
                }
-               _ = x4.Args[2]
-               if ptr != x4.Args[0] {
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (UMOD x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (ANDconst [c-1] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if idx != x4.Args[1] {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               if mem != x4.Args[2] {
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c - 1
+               v.AddArg(x)
+               return true
+       }
+       // match: (UMOD (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(uint64(c)%uint64(d))])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1 && o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && mergePoint(b, x0, x1, x2, x3, x4) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(o0) && clobber(o1) && clobber(o2)) {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               b = mergePoint(b, x0, x1, x2, x3, x4)
-               v0 := b.NewValue0(v.Pos, OpARM64REV, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpARM64MOVDloadidx, t)
-               v1.AddArg(ptr)
-               v1.AddArg(idx)
-               v1.AddArg(mem)
-               v0.AddArg(v1)
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint64(c) % uint64(d))
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ORshiftRA (MOVDconst [c]) x [d])
-       // cond:
-       // result: (ORconst [c] (SRAconst <x.Type> x [d]))
+func rewriteValueARM64_OpARM64UMODW_0(v *Value) bool {
+       // match: (UMODW _ (MOVDconst [c]))
+       // cond: uint32(c)==1
+       // result: (MOVDconst [0])
        for {
-               d := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SRAconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               if !(uint32(c) == 1) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (ORshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (ORconst x [c>>uint64(d)])
+       // match: (UMODW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c) && is32Bit(c)
+       // result: (ANDconst [c-1] x)
        for {
-               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
@@ -24054,44 +27668,56 @@ func rewriteValueARM64_OpARM64ORshiftRA_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = c >> uint64(d)
+               if !(isPowerOfTwo(c) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c - 1
                v.AddArg(x)
                return true
        }
-       // match: (ORshiftRA x y:(SRAconst x [c]) [d])
-       // cond: c==d
-       // result: y
+       // match: (UMODW (MOVDconst [c]) (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [int64(uint32(c)%uint32(d))])
        for {
-               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SRAconst {
-                       break
-               }
-               c := y.AuxInt
-               if x != y.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               if !(c == d) {
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = int64(uint32(c) % uint32(d))
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (ORshiftRL (MOVDconst [c]) x [d])
+func rewriteValueARM64_OpARM64XOR_0(v *Value) bool {
+       // match: (XOR x (MOVDconst [c]))
+       // cond:
+       // result: (XORconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (XOR (MOVDconst [c]) x)
        // cond:
-       // result: (ORconst [c] (SRLconst <x.Type> x [d]))
+       // result: (XORconst [c] x)
        for {
-               d := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -24099,1920 +27725,1098 @@ func rewriteValueARM64_OpARM64ORshiftRL_0(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               v.reset(OpARM64ORconst)
+               v.reset(OpARM64XORconst)
                v.AuxInt = c
-               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, x.Type)
-               v0.AuxInt = d
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (ORshiftRL x (MOVDconst [c]) [d])
+       // match: (XOR x x)
        // cond:
-       // result: (ORconst x [int64(uint64(c)>>uint64(d))])
+       // result: (MOVDconst [0])
        for {
-               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if x != v.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64ORconst)
-               v.AuxInt = int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (ORshiftRL x y:(SRLconst x [c]) [d])
-       // cond: c==d
-       // result: y
+       // match: (XOR x (MVN y))
+       // cond:
+       // result: (EON x y)
        for {
-               d := v.AuxInt
                _ = v.Args[1]
                x := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpARM64SRLconst {
-                       break
-               }
-               c := y.AuxInt
-               if x != y.Args[0] {
-                       break
-               }
-               if !(c == d) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MVN {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
+               y := v_1.Args[0]
+               v.reset(OpARM64EON)
+               v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (ORshiftRL [c] (SLLconst x [64-c]) x)
+       // match: (XOR (MVN y) x)
        // cond:
-       // result: (RORconst [ c] x)
+       // result: (EON x y)
        for {
-               c := v.AuxInt
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64MVN {
                        break
                }
-               if v_0.AuxInt != 64-c {
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64EON)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR x0 x1:(SLLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (XORshiftLL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_0.Args[0]
-               if x != v.Args[1] {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64RORconst)
+               v.reset(OpARM64XORshiftLL)
                v.AuxInt = c
-               v.AddArg(x)
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x))
-       // cond: c < 32 && t.Size() == 4
-       // result: (RORWconst [c] x)
+       // match: (XOR x1:(SLLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (XORshiftLL x0 y [c])
        for {
-               t := v.Type
-               c := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
-                       break
-               }
-               if v_0.AuxInt != 32-c {
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SLLconst {
                        break
                }
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               if x != v_1.Args[0] {
+               v.reset(OpARM64XORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR x0 x1:(SRLconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (XORshiftRL x0 y [c])
+       for {
+               _ = v.Args[1]
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               if !(c < 32 && t.Size() == 4) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64RORWconst)
+               v.reset(OpARM64XORshiftRL)
                v.AuxInt = c
-               v.AddArg(x)
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
-       // cond: lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
-       // result: (BFI [arm64BFAuxInt(lc-rc, 64-lc)] x y)
+       // match: (XOR x1:(SRLconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (XORshiftRL x0 y [c])
        for {
-               rc := v.AuxInt
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
-                       break
-               }
-               ac := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRLconst {
                        break
                }
-               lc := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(lc > rc && ac == ^((1<<uint(64-lc)-1)<<uint64(lc-rc))) {
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
                        break
                }
-               v.reset(OpARM64BFI)
-               v.AuxInt = arm64BFAuxInt(lc-rc, 64-lc)
-               v.AddArg(x)
+               v.reset(OpARM64XORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x0)
                v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64SLL_0(v *Value) bool {
-       // match: (SLL x (MOVDconst [c]))
-       // cond:
-       // result: (SLLconst x [c&63])
+       // match: (XOR x0 x1:(SRAconst [c] y))
+       // cond: clobberIfDead(x1)
+       // result: (XORshiftRA x0 y [c])
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               x0 := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = c & 63
-               v.AddArg(x)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64XORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64SLLconst_0(v *Value) bool {
-       // match: (SLLconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [d<<uint64(c)])
+func rewriteValueARM64_OpARM64XOR_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (XOR x1:(SRAconst [c] y) x0)
+       // cond: clobberIfDead(x1)
+       // result: (XORshiftRA x0 y [c])
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v.Args[1]
+               x1 := v.Args[0]
+               if x1.Op != OpARM64SRAconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = d << uint64(c)
+               c := x1.AuxInt
+               y := x1.Args[0]
+               x0 := v.Args[1]
+               if !(clobberIfDead(x1)) {
+                       break
+               }
+               v.reset(OpARM64XORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x0)
+               v.AddArg(y)
                return true
        }
-       // match: (SLLconst [c] (SRLconst [c] x))
-       // cond: 0 < c && c < 64
-       // result: (ANDconst [^(1<<uint(c)-1)] x)
+       // match: (XOR (SLL x (ANDconst <t> [63] y)) (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x (NEG <t> y))
        for {
-               c := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               if v_0.Op != OpARM64SLL {
                        break
                }
-               if v_0.AuxInt != c {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v_0.Args[0]
-               if !(0 < c && c < 64) {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = ^(1<<uint(c) - 1)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SLLconst [sc] (ANDconst [ac] x))
-       // cond: isARM64BFMask(sc, ac, 0)
-       // result: (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CSEL0 {
+                       break
+               }
+               if v_1.Type != typ.UInt64 {
+                       break
+               }
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SRL {
+                       break
+               }
+               if v_1_0.Type != typ.UInt64 {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               ac := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, ac, 0)) {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, 0))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SLLconst [sc] (MOVWUreg x))
-       // cond: isARM64BFMask(sc, 1<<32-1, 0)
-       // result: (UBFIZ [arm64BFAuxInt(sc, 32)] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVWUreg {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, 0)) {
+               if v_1_0_1_0.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, 32)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SLLconst [sc] (MOVHUreg x))
-       // cond: isARM64BFMask(sc, 1<<16-1, 0)
-       // result: (UBFIZ [arm64BFAuxInt(sc, 16)] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVHUreg {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, 0)) {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, 16)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SLLconst [sc] (MOVBUreg x))
-       // cond: isARM64BFMask(sc, 1<<8-1, 0)
-       // result: (UBFIZ [arm64BFAuxInt(sc, 8)] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVBUreg {
+               if v_1_0_1_1.AuxInt != 63 {
                        break
                }
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<8-1, 0)) {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc, 8)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SLLconst [sc] (UBFIZ [bfc] x))
-       // cond: sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
-       // result: (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFIZ {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SRA_0(v *Value) bool {
-       // match: (SRA x (MOVDconst [c]))
-       // cond:
-       // result: (SRAconst x [c&63])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SRAconst)
-               v.AuxInt = c & 63
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SRAconst_0(v *Value) bool {
-       // match: (SRAconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [d>>uint64(c)])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_1_1_0.Type != t {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = d >> uint64(c)
-               return true
-       }
-       // match: (SRAconst [rc] (SLLconst [lc] x))
-       // cond: lc > rc
-       // result: (SBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
-       for {
-               rc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc > rc) {
+               if v_1_1_0_0.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc-rc, 64-lc)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRAconst [rc] (SLLconst [lc] x))
-       // cond: lc <= rc
-       // result: (SBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
-       for {
-               rc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc <= rc) {
+               if v_1_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64SBFX)
-               v.AuxInt = arm64BFAuxInt(rc-lc, 64-rc)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRAconst [rc] (MOVWreg x))
-       // cond: rc < 32
-       // result: (SBFX [arm64BFAuxInt(rc, 32-rc)] x)
-       for {
-               rc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVWreg {
+               if v_1_1_0_1.AuxInt != 63 {
                        break
                }
-               x := v_0.Args[0]
-               if !(rc < 32) {
+               if y != v_1_1_0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64SBFX)
-               v.AuxInt = arm64BFAuxInt(rc, 32-rc)
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SRAconst [rc] (MOVHreg x))
-       // cond: rc < 16
-       // result: (SBFX [arm64BFAuxInt(rc, 16-rc)] x)
+       // match: (XOR (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))) (SLL x (ANDconst <t> [63] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x (NEG <t> y))
        for {
-               rc := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVHreg {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               x := v_0.Args[0]
-               if !(rc < 16) {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64SBFX)
-               v.AuxInt = arm64BFAuxInt(rc, 16-rc)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRAconst [rc] (MOVBreg x))
-       // cond: rc < 8
-       // result: (SBFX [arm64BFAuxInt(rc, 8-rc)] x)
-       for {
-               rc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVBreg {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SRL {
                        break
                }
-               x := v_0.Args[0]
-               if !(rc < 8) {
+               if v_0_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64SBFX)
-               v.AuxInt = arm64BFAuxInt(rc, 8-rc)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRAconst [sc] (SBFIZ [bfc] x))
-       // cond: sc < getARM64BFlsb(bfc)
-       // result: (SBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SBFIZ {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc < getARM64BFlsb(bfc)) {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64SBFIZ)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRAconst [sc] (SBFIZ [bfc] x))
-       // cond: sc >= getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
-       // result: (SBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SBFIZ {
+               if v_0_0_1_0.AuxInt != 64 {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc >= getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)) {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64SBFX)
-               v.AuxInt = arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SRL_0(v *Value) bool {
-       // match: (SRL x (MOVDconst [c]))
-       // cond:
-       // result: (SRLconst x [c&63])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SRLconst)
-               v.AuxInt = c & 63
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SRLconst_0(v *Value) bool {
-       // match: (SRLconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(uint64(d)>>uint64(c))])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_0_1_1.AuxInt != 63 {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint64(d) >> uint64(c))
-               return true
-       }
-       // match: (SRLconst [c] (SLLconst [c] x))
-       // cond: 0 < c && c < 64
-       // result: (ANDconst [1<<uint(64-c)-1] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               if v_0.AuxInt != c {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               x := v_0.Args[0]
-               if !(0 < c && c < 64) {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = 1<<uint(64-c) - 1
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [rc] (SLLconst [lc] x))
-       // cond: lc > rc
-       // result: (UBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
-       for {
-               rc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0_1_0.Type != t {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc > rc) {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(lc-rc, 64-lc)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [sc] (ANDconst [ac] x))
-       // cond: isARM64BFMask(sc, ac, sc)
-       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ANDconst {
+               if v_0_1_0_0.AuxInt != 64 {
                        break
                }
-               ac := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, ac, sc)) {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(ac, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [sc] (MOVWUreg x))
-       // cond: isARM64BFMask(sc, 1<<32-1, sc)
-       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVWUreg {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<32-1, sc)) {
+               if v_0_1_0_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [sc] (MOVHUreg x))
-       // cond: isARM64BFMask(sc, 1<<16-1, sc)
-       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVHUreg {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<16-1, sc)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLL {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [sc] (MOVBUreg x))
-       // cond: isARM64BFMask(sc, 1<<8-1, sc)
-       // result: (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVBUreg {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               x := v_0.Args[0]
-               if !(isARM64BFMask(sc, 1<<8-1, sc)) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [rc] (SLLconst [lc] x))
-       // cond: lc < rc
-       // result: (UBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
-       for {
-               rc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_1_1.Type != t {
                        break
                }
-               lc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(lc < rc) {
+               if v_1_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(rc-lc, 64-rc)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [sc] (UBFX [bfc] x))
-       // cond: sc < getARM64BFwidth(bfc)
-       // result: (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFX {
+               if y != v_1_1.Args[0] {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc < getARM64BFwidth(bfc)) {
+               if !(cc.(Op) == OpARM64LessThanU) {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SRLconst [sc] (UBFIZ [bfc] x))
-       // cond: sc == getARM64BFlsb(bfc)
-       // result: (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
+       // match: (XOR (SRL <typ.UInt64> x (ANDconst <t> [63] y)) (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x y)
        for {
-               sc := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFIZ {
+               if v_0.Op != OpARM64SRL {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc == getARM64BFlsb(bfc)) {
+               if v_0.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = 1<<uint(getARM64BFwidth(bfc)) - 1
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SRLconst_10(v *Value) bool {
-       // match: (SRLconst [sc] (UBFIZ [bfc] x))
-       // cond: sc < getARM64BFlsb(bfc)
-       // result: (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFIZ {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc < getARM64BFlsb(bfc)) {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SRLconst [sc] (UBFIZ [bfc] x))
-       // cond: sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
-       // result: (UBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
-       for {
-               sc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64UBFIZ {
+               y := v_0_1.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               bfc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)) {
+               if v_1.Type != typ.UInt64 {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64STP_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem)
-       // cond: is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (STP [off1+off2] {sym} ptr val1 val2 mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SLL {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val1 := v.Args[1]
-               val2 := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               v.reset(OpARM64STP)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val1)
-               v.AddArg(val2)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
-       // cond: canMergeSym(sym1,sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
-       // result: (STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val1 := v.Args[1]
-               val2 := v.Args[2]
-               mem := v.Args[3]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)) {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64STP)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val1)
-               v.AddArg(val2)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem)
-       // cond:
-       // result: (MOVQstorezero [off] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               if v_1_0_1_0.AuxInt != 64 {
                        break
                }
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64MOVDconst {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               if v_2.AuxInt != 0 {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               mem := v.Args[3]
-               v.reset(OpARM64MOVQstorezero)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SUB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SUB x (MOVDconst [c]))
-       // cond:
-       // result: (SUBconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.AuxInt != 63 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SUBconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (SUB a l:(MUL x y))
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
-       // result: (MSUB a x y)
-       for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpARM64MUL {
+               if y != v_1_0_1_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               _ = l.Args[1]
-               x := l.Args[0]
-               y := l.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64MSUB)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SUB a l:(MNEG x y))
-       // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
-       // result: (MADD a x y)
-       for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpARM64MNEG {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               _ = l.Args[1]
-               x := l.Args[0]
-               y := l.Args[1]
-               if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if v_1_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64MADD)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SUB a l:(MULW x y))
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
-       // result: (MSUBW a x y)
-       for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpARM64MULW {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               _ = l.Args[1]
-               x := l.Args[0]
-               y := l.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if v_1_1_0_0.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64MSUBW)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SUB a l:(MNEGW x y))
-       // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
-       // result: (MADDW a x y)
-       for {
-               _ = v.Args[1]
-               a := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpARM64MNEGW {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               _ = l.Args[1]
-               x := l.Args[0]
-               y := l.Args[1]
-               if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+               if v_1_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64MADDW)
-               v.AddArg(a)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SUB x x)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               if v_1_1_0_1.AuxInt != 63 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (SUB x (SUB y z))
-       // cond:
-       // result: (SUB (ADD <v.Type> x z) y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SUB {
+               if y != v_1_1_0_1.Args[0] {
                        break
                }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               z := v_1.Args[1]
-               v.reset(OpARM64SUB)
-               v0 := b.NewValue0(v.Pos, OpARM64ADD, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(z)
-               v.AddArg(v0)
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
+               v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SUB (SUB x y) z)
-       // cond:
-       // result: (SUB x (ADD <y.Type> y z))
+       // match: (XOR (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))) (CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))) (SRL <typ.UInt64> x (ANDconst <t> [63] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (ROR x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SUB {
+               if v_0.Op != OpARM64CSEL0 {
+                       break
+               }
+               if v_0.Type != typ.UInt64 {
                        break
                }
+               cc := v_0.Aux
                _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               z := v.Args[1]
-               v.reset(OpARM64SUB)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpARM64ADD, y.Type)
-               v0.AddArg(y)
-               v0.AddArg(z)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (SUB x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (SUBshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SLL {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               v.reset(OpARM64SUBshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SUB x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (SUBshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_0_1_0.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64SUBshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SUB_10(v *Value) bool {
-       // match: (SUB x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (SUBshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               v.reset(OpARM64SUBshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SUBconst_0(v *Value) bool {
-       // match: (SUBconst [0] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               if v_0_0_1_1.AuxInt != 63 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (SUBconst [c] (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [d-c])
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = d - c
-               return true
-       }
-       // match: (SUBconst [c] (SUBconst [d] x))
-       // cond:
-       // result: (ADDconst [-c-d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SUBconst {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = -c - d
-               v.AddArg(x)
-               return true
-       }
-       // match: (SUBconst [c] (ADDconst [d] x))
-       // cond:
-       // result: (ADDconst [-c+d] x)
-       for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = -c + d
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SUBshiftLL_0(v *Value) bool {
-       // match: (SUBshiftLL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (SUBconst x [int64(uint64(c)<<uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0_1_0.Type != t {
+                       break
+               }
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0_1_0_0.AuxInt != 64 {
+                       break
+               }
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_0_1_0_1.Type != t {
+                       break
+               }
+               if v_0_1_0_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SUBconst)
-               v.AuxInt = int64(uint64(c) << uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SUBshiftLL x (SLLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [0])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64SLLconst {
+               if v_1.Op != OpARM64SRL {
                        break
                }
-               c := v_1.AuxInt
+               if v_1.Type != typ.UInt64 {
+                       break
+               }
+               _ = v_1.Args[1]
                if x != v_1.Args[0] {
                        break
                }
-               if !(c == d) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SUBshiftRA_0(v *Value) bool {
-       // match: (SUBshiftRA x (MOVDconst [c]) [d])
-       // cond:
-       // result: (SUBconst x [c>>uint64(d)])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SUBconst)
-               v.AuxInt = c >> uint64(d)
+               if v_1_1.AuxInt != 63 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64ROR)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SUBshiftRA x (SRAconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [0])
+       // match: (XOR (SLL x (ANDconst <t> [31] y)) (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x (NEG <t> y))
        for {
-               d := v.AuxInt
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRAconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLL {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               if !(c == d) {
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64SUBshiftRL_0(v *Value) bool {
-       // match: (SUBshiftRL x (MOVDconst [c]) [d])
-       // cond:
-       // result: (SUBconst x [int64(uint64(c)>>uint64(d))])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
+               y := v_0_1.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64SUBconst)
-               v.AuxInt = int64(uint64(c) >> uint64(d))
-               v.AddArg(x)
-               return true
-       }
-       // match: (SUBshiftRL x (SRLconst x [c]) [d])
-       // cond: c==d
-       // result: (MOVDconst [0])
-       for {
-               d := v.AuxInt
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64SRLconst {
+               if v_1.Type != typ.UInt32 {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SRL {
                        break
                }
-               if !(c == d) {
+               if v_1_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64TST_0(v *Value) bool {
-       // match: (TST x (MOVDconst [c]))
-       // cond:
-       // result: (TSTconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64TSTconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (TST (MOVDconst [c]) x)
-       // cond:
-       // result: (TSTconst [c] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if x != v_1_0_0.Args[0] {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64TSTconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64TSTW_0(v *Value) bool {
-       // match: (TSTW x (MOVDconst [c]))
-       // cond:
-       // result: (TSTWconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64TSTWconst)
-               v.AuxInt = c
+               if v_1_0_1.Type != t {
+                       break
+               }
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_0_1_1.Type != t {
+                       break
+               }
+               if v_1_0_1_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_0_1_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
+                       break
+               }
+               if v_1_1.AuxInt != 64 {
+                       break
+               }
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
+                       break
+               }
+               if v_1_1_0.Type != t {
+                       break
+               }
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1_0_1.Type != t {
+                       break
+               }
+               if v_1_1_0_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (TSTW (MOVDconst [c]) x)
-       // cond:
-       // result: (TSTWconst [c] x)
+       // match: (XOR (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))) (SLL x (ANDconst <t> [31] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x (NEG <t> y))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64TSTWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64TSTWconst_0(v *Value) bool {
-       // match: (TSTWconst (MOVDconst [x]) [y])
-       // cond: int32(x&y)==0
-       // result: (FlagEQ)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x&y) == 0) {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SRL {
                        break
                }
-               v.reset(OpARM64FlagEQ)
-               return true
-       }
-       // match: (TSTWconst (MOVDconst [x]) [y])
-       // cond: int32(x&y)<0
-       // result: (FlagLT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_0.Type != typ.UInt32 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x&y) < 0) {
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               v.reset(OpARM64FlagLT_UGT)
-               return true
-       }
-       // match: (TSTWconst (MOVDconst [x]) [y])
-       // cond: int32(x&y)>0
-       // result: (FlagGT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v_0_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x&y) > 0) {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64FlagGT_UGT)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64TSTconst_0(v *Value) bool {
-       // match: (TSTconst (MOVDconst [x]) [y])
-       // cond: int64(x&y)==0
-       // result: (FlagEQ)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_0_1_0.AuxInt != 32 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x&y) == 0) {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64FlagEQ)
-               return true
-       }
-       // match: (TSTconst (MOVDconst [x]) [y])
-       // cond: int64(x&y)<0
-       // result: (FlagLT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_0_1_1.Type != t {
+                       break
+               }
+               if v_0_0_1_1.AuxInt != 31 {
+                       break
+               }
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
+                       break
+               }
+               if v_0_1.AuxInt != 64 {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
+                       break
+               }
+               if v_0_1_0.Type != t {
+                       break
+               }
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0_1_0_0.AuxInt != 32 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x&y) < 0) {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64FlagLT_UGT)
-               return true
-       }
-       // match: (TSTconst (MOVDconst [x]) [y])
-       // cond: int64(x&y)>0
-       // result: (FlagGT_UGT)
-       for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               x := v_0.AuxInt
-               if !(int64(x&y) > 0) {
+               if v_0_1_0_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64FlagGT_UGT)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64UBFIZ_0(v *Value) bool {
-       // match: (UBFIZ [bfc] (SLLconst [sc] x))
-       // cond: sc < getARM64BFwidth(bfc)
-       // result: (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
-       for {
-               bfc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc < getARM64BFwidth(bfc)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLL {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64UBFX_0(v *Value) bool {
-       // match: (UBFX [bfc] (SRLconst [sc] x))
-       // cond: sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
-       // result: (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
-       for {
-               bfc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SRLconst {
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64) {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))
-               v.AddArg(x)
-               return true
-       }
-       // match: (UBFX [bfc] (SLLconst [sc] x))
-       // cond: sc == getARM64BFlsb(bfc)
-       // result: (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
-       for {
-               bfc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_1_1.Type != t {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc == getARM64BFlsb(bfc)) {
+               if v_1_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = 1<<uint(getARM64BFwidth(bfc)) - 1
-               v.AddArg(x)
-               return true
-       }
-       // match: (UBFX [bfc] (SLLconst [sc] x))
-       // cond: sc < getARM64BFlsb(bfc)
-       // result: (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
-       for {
-               bfc := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if y != v_1_1.Args[0] {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc < getARM64BFlsb(bfc)) {
+               if !(cc.(Op) == OpARM64LessThanU) {
                        break
                }
-               v.reset(OpARM64UBFX)
-               v.AuxInt = arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))
+               v.reset(OpARM64RORW)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, t)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (UBFX [bfc] (SLLconst [sc] x))
-       // cond: sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
-       // result: (UBFIZ [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
+       // match: (XOR (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y)) (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x y)
        for {
-               bfc := v.AuxInt
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64SLLconst {
+               if v_0.Op != OpARM64SRL {
                        break
                }
-               sc := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(sc > getARM64BFlsb(bfc) && sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)) {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64UBFIZ)
-               v.AuxInt = arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64UDIV_0(v *Value) bool {
-       // match: (UDIV x (MOVDconst [1]))
-       // cond:
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64MOVWUreg {
                        break
                }
-               if v_1.AuxInt != 1 {
+               x := v_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (UDIV x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SRLconst [log2(c)] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               t := v_0_1.Type
+               if v_0_1.AuxInt != 31 {
+                       break
+               }
+               y := v_0_1.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if v_1.Type != typ.UInt32 {
                        break
                }
-               v.reset(OpARM64SRLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (UDIV (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(uint64(c)/uint64(d))])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               cc := v_1.Aux
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64SLL {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint64(c) / uint64(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64UDIVW_0(v *Value) bool {
-       // match: (UDIVW x (MOVDconst [c]))
-       // cond: uint32(c)==1
-       // result: x
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpARM64SUB {
                        break
                }
-               c := v_1.AuxInt
-               if !(uint32(c) == 1) {
+               if v_1_0_1.Type != t {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (UDIVW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c) && is32Bit(c)
-       // result: (SRLconst [log2(c)] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_0_1.Args[1]
+               v_1_0_1_0 := v_1_0_1.Args[0]
+               if v_1_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c) && is32Bit(c)) {
+               if v_1_0_1_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64SRLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (UDIVW (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(uint32(c)/uint32(d))])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_0_1_1 := v_1_0_1.Args[1]
+               if v_1_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.Type != t {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint32(c) / uint32(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool {
-       // match: (UMOD _ (MOVDconst [1]))
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_0_1_1.AuxInt != 31 {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if y != v_1_0_1_1.Args[0] {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (UMOD x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (ANDconst [c-1] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64CMPconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if v_1_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c - 1
-               v.AddArg(x)
-               return true
-       }
-       // match: (UMOD (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(uint64(c)%uint64(d))])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpARM64SUB {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1_0.Type != t {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint64(c) % uint64(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64UMODW_0(v *Value) bool {
-       // match: (UMODW _ (MOVDconst [c]))
-       // cond: uint32(c)==1
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               _ = v_1_1_0.Args[1]
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1_1_0_0.AuxInt != 32 {
                        break
                }
-               c := v_1.AuxInt
-               if !(uint32(c) == 1) {
+               v_1_1_0_1 := v_1_1_0.Args[1]
+               if v_1_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (UMODW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c) && is32Bit(c)
-       // result: (ANDconst [c-1] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_1_1_0_1.Type != t {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c) && is32Bit(c)) {
+               if v_1_1_0_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64ANDconst)
-               v.AuxInt = c - 1
+               if y != v_1_1_0_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (UMODW (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [int64(uint32(c)%uint32(d))])
+       // match: (XOR (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))) (CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y)))
+       // cond: cc.(Op) == OpARM64LessThanU
+       // result: (RORW x y)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               if v_0.Op != OpARM64CSEL0 {
                        break
                }
-               c := v_0.AuxInt
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               if v_0.Type != typ.UInt32 {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = int64(uint32(c) % uint32(d))
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64XOR_0(v *Value) bool {
-       // match: (XOR x (MOVDconst [c]))
-       // cond:
-       // result: (XORconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               cc := v_0.Aux
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64SLL {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpARM64XORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (XOR (MOVDconst [c]) x)
-       // cond:
-       // result: (XORconst [c] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpARM64SUB {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64XORconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (XOR x x)
-       // cond:
-       // result: (MOVDconst [0])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               t := v_0_0_1.Type
+               _ = v_0_0_1.Args[1]
+               v_0_0_1_0 := v_0_0_1.Args[0]
+               if v_0_0_1_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (XOR x (MVN y))
-       // cond:
-       // result: (EON x y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MVN {
+               if v_0_0_1_0.AuxInt != 32 {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpARM64EON)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (XOR (MVN y) x)
-       // cond:
-       // result: (EON x y)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MVN {
+               v_0_0_1_1 := v_0_0_1.Args[1]
+               if v_0_0_1_1.Op != OpARM64ANDconst {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpARM64EON)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (XOR x0 x1:(SLLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (XORshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SLLconst {
+               if v_0_0_1_1.Type != t {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_0_1_1.AuxInt != 31 {
                        break
                }
-               v.reset(OpARM64XORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (XOR x1:(SLLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (XORshiftLL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SLLconst {
+               y := v_0_0_1_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpARM64CMPconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if v_0_1.AuxInt != 64 {
                        break
                }
-               v.reset(OpARM64XORshiftLL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (XOR x0 x1:(SRLconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (XORshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRLconst {
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpARM64SUB {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_1_0.Type != t {
                        break
                }
-               v.reset(OpARM64XORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (XOR x1:(SRLconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (XORshiftRL x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRLconst {
+               _ = v_0_1_0.Args[1]
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if v_0_1_0_0.AuxInt != 32 {
                        break
                }
-               v.reset(OpARM64XORshiftRL)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       // match: (XOR x0 x1:(SRAconst [c] y))
-       // cond: clobberIfDead(x1)
-       // result: (XORshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x0 := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpARM64SRAconst {
+               v_0_1_0_1 := v_0_1_0.Args[1]
+               if v_0_1_0_1.Op != OpARM64ANDconst {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               if !(clobberIfDead(x1)) {
+               if v_0_1_0_1.Type != t {
                        break
                }
-               v.reset(OpARM64XORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
-               v.AddArg(y)
-               return true
-       }
-       return false
-}
-func rewriteValueARM64_OpARM64XOR_10(v *Value) bool {
-       // match: (XOR x1:(SRAconst [c] y) x0)
-       // cond: clobberIfDead(x1)
-       // result: (XORshiftRA x0 y [c])
-       for {
-               _ = v.Args[1]
-               x1 := v.Args[0]
-               if x1.Op != OpARM64SRAconst {
+               if v_0_1_0_1.AuxInt != 31 {
                        break
                }
-               c := x1.AuxInt
-               y := x1.Args[0]
-               x0 := v.Args[1]
-               if !(clobberIfDead(x1)) {
+               if y != v_0_1_0_1.Args[0] {
                        break
                }
-               v.reset(OpARM64XORshiftRA)
-               v.AuxInt = c
-               v.AddArg(x0)
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRL {
+                       break
+               }
+               if v_1.Type != typ.UInt32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpARM64ANDconst {
+                       break
+               }
+               if v_1_1.Type != t {
+                       break
+               }
+               if v_1_1.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1.Args[0] {
+                       break
+               }
+               if !(cc.(Op) == OpARM64LessThanU) {
+                       break
+               }
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
                v.AddArg(y)
                return true
        }
@@ -30534,6 +33338,42 @@ func rewriteValueARM64_OpPopCount64_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM64_OpRotateLeft32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (RotateLeft32 x y)
+       // cond:
+       // result: (RORW x (NEG <y.Type> y))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64RORW)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, y.Type)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueARM64_OpRotateLeft64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (RotateLeft64 x y)
+       // cond:
+       // result: (ROR x (NEG <y.Type> y))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpARM64ROR)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpARM64NEG, y.Type)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueARM64_OpRound_0(v *Value) bool {
        // match: (Round x)
        // cond:
index b8844c518f1a0cf655d0eb9497b084faec62fc2d..28354ed65175ccb6e07fa1a127f9ae72d59637b8 100644 (file)
@@ -195,6 +195,7 @@ func RotateLeft8(n uint8) uint8 {
 
 func RotateLeftVariable(n uint, m int) uint {
        // amd64:"ROLQ"
+       // arm64:"ROR"
        // ppc64:"ROTL"
        // s390x:"RLLG"
        return bits.RotateLeft(n, m)
@@ -202,6 +203,7 @@ func RotateLeftVariable(n uint, m int) uint {
 
 func RotateLeftVariable64(n uint64, m int) uint64 {
        // amd64:"ROLQ"
+       // arm64:"ROR"
        // ppc64:"ROTL"
        // s390x:"RLLG"
        return bits.RotateLeft64(n, m)
@@ -209,6 +211,7 @@ func RotateLeftVariable64(n uint64, m int) uint64 {
 
 func RotateLeftVariable32(n uint32, m int) uint32 {
        // amd64:"ROLL"
+       // arm64:"RORW"
        // ppc64:"ROTLW"
        // s390x:"RLL"
        return bits.RotateLeft32(n, m)