]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.ssa] cmd/compile, etc.: more ARM64 optimizations, and enable SSA by default
authorCherry Zhang <cherryyz@google.com>
Wed, 10 Aug 2016 17:24:03 +0000 (13:24 -0400)
committerCherry Zhang <cherryyz@google.com>
Mon, 15 Aug 2016 03:37:34 +0000 (03:37 +0000)
Add more ARM64 optimizations:
- use hardware zero register when it is possible.
- use shifted ops.
  The assembler supports shifted ops but not documented, nor knows
  how to print it. This CL adds them.
- enable fast division.
  This was disabled because it makes the old backend generate slower
  code. But with SSA it generates faster code.

Turn on SSA by default, also adjust tests.

Change-Id: I7794479954c83bb65008dcb457bc1e21d7496da6
Reviewed-on: https://go-review.googlesource.com/26950
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
17 files changed:
src/cmd/asm/internal/asm/operand_test.go
src/cmd/compile/internal/arm64/prog.go
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/gc/walk.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/internal/obj/arm64/a.out.go
src/cmd/internal/obj/link.go
src/cmd/internal/obj/util.go
test/live.go
test/live_ssa.go
test/nilptr3.go
test/nilptr3_ssa.go
test/sliceopt.go

index eafc8a361e489905c5010d59c581310c42d2090b..a8d8f5f34f8a0e502b72026ae0a3de27a10965af 100644 (file)
@@ -17,6 +17,7 @@ import (
 
 func setArch(goarch string) (*arch.Arch, *obj.Link) {
        os.Setenv("GOOS", "linux") // obj can handle this OS for all architectures.
+       os.Setenv("GOARCH", goarch)
        architecture := arch.Set(goarch)
        if architecture == nil {
                panic("asm: unrecognized architecture " + goarch)
index 783a371015762274f57ba15e6fdb6122f65d903d..af43163ece66188e7bec1282ab9246e980a239f3 100644 (file)
@@ -71,6 +71,7 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
        arm64.ACMPW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead},
        arm64.AADC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
        arm64.AROR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
+       arm64.ARORW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
        arm64.AADDS & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
        arm64.ACSET & obj.AMask:  {Flags: gc.SizeQ | gc.RightWrite},
        arm64.ACSEL & obj.AMask:  {Flags: gc.SizeQ | gc.RegRead | gc.RightWrite},
index 24281300aef0cf0fac2ea68d0d3c588243884f86..1f96909716b75054972b301197ab0b47c0baf1d2 100644 (file)
@@ -148,6 +148,24 @@ func storeByType(t ssa.Type) obj.As {
        panic("bad store type")
 }
 
+// makeshift encodes a register shifted by a constant, used as an Offset in Prog
+func makeshift(reg int16, typ int64, s int64) int64 {
+       return int64(reg&31)<<16 | typ | (s&63)<<10
+}
+
+// genshift generates a Prog for r = r0 op (r1 shifted by s)
+func genshift(as obj.As, r0, r1, r int16, typ int64, s int64) *obj.Prog {
+       p := gc.Prog(as)
+       p.From.Type = obj.TYPE_SHIFT
+       p.From.Offset = makeshift(r1, typ, s)
+       p.Reg = r0
+       if r != 0 {
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+       }
+       return p
+}
+
 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        s.SetLineno(v.Line)
        switch v.Op {
@@ -284,6 +302,27 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.Reg = gc.SSARegNum(v.Args[0])
                p.To.Type = obj.TYPE_REG
                p.To.Reg = gc.SSARegNum(v)
+       case ssa.OpARM64ADDshiftLL,
+               ssa.OpARM64SUBshiftLL,
+               ssa.OpARM64ANDshiftLL,
+               ssa.OpARM64ORshiftLL,
+               ssa.OpARM64XORshiftLL,
+               ssa.OpARM64BICshiftLL:
+               genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt)
+       case ssa.OpARM64ADDshiftRL,
+               ssa.OpARM64SUBshiftRL,
+               ssa.OpARM64ANDshiftRL,
+               ssa.OpARM64ORshiftRL,
+               ssa.OpARM64XORshiftRL,
+               ssa.OpARM64BICshiftRL:
+               genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt)
+       case ssa.OpARM64ADDshiftRA,
+               ssa.OpARM64SUBshiftRA,
+               ssa.OpARM64ANDshiftRA,
+               ssa.OpARM64ORshiftRA,
+               ssa.OpARM64XORshiftRA,
+               ssa.OpARM64BICshiftRA:
+               genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt)
        case ssa.OpARM64MOVDconst:
                p := gc.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_CONST
@@ -315,6 +354,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Type = obj.TYPE_CONST
                p.From.Offset = v.AuxInt
                p.Reg = gc.SSARegNum(v.Args[0])
+       case ssa.OpARM64CMPshiftLL:
+               genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt)
+       case ssa.OpARM64CMPshiftRL:
+               genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt)
+       case ssa.OpARM64CMPshiftRA:
+               genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt)
        case ssa.OpARM64MOVDaddr:
                p := gc.Prog(arm64.AMOVD)
                p.From.Type = obj.TYPE_ADDR
@@ -372,6 +417,16 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_MEM
                p.To.Reg = gc.SSARegNum(v.Args[0])
                gc.AddAux(&p.To, v)
+       case ssa.OpARM64MOVBstorezero,
+               ssa.OpARM64MOVHstorezero,
+               ssa.OpARM64MOVWstorezero,
+               ssa.OpARM64MOVDstorezero:
+               p := gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = arm64.REGZERO
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = gc.SSARegNum(v.Args[0])
+               gc.AddAux(&p.To, v)
        case ssa.OpARM64MOVBreg,
                ssa.OpARM64MOVBUreg,
                ssa.OpARM64MOVHreg,
@@ -433,12 +488,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Reg = gc.SSARegNum(v.Args[0])
                p.To.Type = obj.TYPE_REG
                p.To.Reg = gc.SSARegNum(v)
-       case ssa.OpARM64CSELULT:
+       case ssa.OpARM64CSELULT,
+               ssa.OpARM64CSELULT0:
+               r1 := int16(arm64.REGZERO)
+               if v.Op == ssa.OpARM64CSELULT {
+                       r1 = gc.SSARegNum(v.Args[1])
+               }
                p := gc.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
                p.From.Reg = arm64.COND_LO
                p.Reg = gc.SSARegNum(v.Args[0])
-               p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: gc.SSARegNum(v.Args[1])}
+               p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1}
                p.To.Type = obj.TYPE_REG
                p.To.Reg = gc.SSARegNum(v)
        case ssa.OpARM64DUFFZERO:
index 77c20d474f3965f3f0c24ad9aec4c48e3da2cc67..2e7d45d54382e7aaf8fc8f8a21b66c44aeb8d4da 100644 (file)
@@ -40,7 +40,7 @@ func shouldssa(fn *Node) bool {
                if os.Getenv("SSATEST") == "" {
                        return false
                }
-       case "amd64", "amd64p32", "arm", "386":
+       case "amd64", "amd64p32", "arm", "386", "arm64":
                // Generally available.
        }
        if !ssaEnabled {
index c6aeddb172a6e794a4a04df43e775d62b1ed8aa6..1e7d80d3e931c46a6f9efb185610ee58443ab3ff 100644 (file)
@@ -3336,6 +3336,7 @@ func samecheap(a *Node, b *Node) bool {
 // The result of walkrotate MUST be assigned back to n, e.g.
 //     n.Left = walkrotate(n.Left)
 func walkrotate(n *Node) *Node {
+       //TODO: enable LROT on ARM64 once the old backend is gone
        if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
                return n
        }
@@ -3529,16 +3530,6 @@ func walkdiv(n *Node, init *Nodes) *Node {
                        goto ret
                }
 
-               // TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
-               // on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
-               // data movement between registers. It could be enabled when generated code is good enough.
-               if Thearch.LinkArch.Family == sys.ARM64 {
-                       switch Simtype[nl.Type.Etype] {
-                       case TUINT8, TINT8, TUINT16, TINT16:
-                               return n
-                       }
-               }
-
                switch Simtype[nl.Type.Etype] {
                default:
                        return n
index 715bddee388f6ccb8f71a852d3dd35934127eac0..bc215c56b4f98c89e0d3466af1a72927da5c665c 100644 (file)
 (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVDstore [off1+off2] {sym} ptr val mem)
 (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
 (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
+(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBstorezero [off1+off2] {sym} ptr mem)
+(MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVHstorezero [off1+off2] {sym} ptr mem)
+(MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVWstorezero [off1+off2] {sym} ptr mem)
+(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVDstorezero [off1+off2] {sym} ptr mem)
 
 (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
        (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
        (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+       (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+       (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+       (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+       (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+
+// store zero
+(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
+(MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
+(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
 
 // replace load from same location as preceding store with copy
 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
 (FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
 (FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
 
+(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+
 // don't extend after proper load
 (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
 (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
 (MUL _ (MOVDconst [0])) -> (MOVDconst [0])
 (MUL x (MOVDconst [1])) -> x
 (MUL x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 (MUL (MOVDconst [-1]) x) -> (NEG x)
 (MUL (MOVDconst [0]) _) -> (MOVDconst [0])
 (MUL (MOVDconst [1]) x) -> x
 (MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MUL (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MUL (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MUL (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MUL (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MUL (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MUL (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 (MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
 (MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
 (MULW x (MOVDconst [c])) && int32(c)==1 -> x
 (MULW x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 (MULW (MOVDconst [c]) x) && int32(c)==-1 -> (NEG x)
 (MULW (MOVDconst [c]) _) && int32(c)==0 -> (MOVDconst [0])
 (MULW (MOVDconst [c]) x) && int32(c)==1 -> x
 (MULW (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MULW (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MULW (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MULW (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MULW (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MULW (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MULW (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 // div by constant
 (UDIV x (MOVDconst [1])) -> x
 (XOR x x) -> (MOVDconst [0])
 (BIC x x) -> (MOVDconst [0])
 (AND x (MVN y)) -> (BIC x y)
+(CSELULT x (MOVDconst [0]) flag) -> (CSELULT0 x flag)
 
 // remove redundant *const ops
 (ADDconst [0]  x) -> x
 (CSELULT _ y (FlagLT_UGT)) -> y
 (CSELULT x _ (FlagGT_ULT)) -> x
 (CSELULT _ y (FlagGT_UGT)) -> y
+(CSELULT0 _ (FlagEQ)) -> (MOVDconst [0])
+(CSELULT0 x (FlagLT_ULT)) -> x
+(CSELULT0 _ (FlagLT_UGT)) -> (MOVDconst [0])
+(CSELULT0 x (FlagGT_ULT)) -> x
+(CSELULT0 _ (FlagGT_UGT)) -> (MOVDconst [0])
+
+// absorb shifts into ops
+(ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
+(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
+(ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
+(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
+(ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
+(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
+(SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
+(SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
+(SUB x (SRAconst [c] y)) -> (SUBshiftRA x y [c])
+(AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
+(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
+(AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
+(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
+(AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
+(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
+(OR  x (SLLconst [c] y)) -> (ORshiftLL  x y [c])
+(OR  (SLLconst [c] y) x) -> (ORshiftLL  x y [c])
+(OR  x (SRLconst [c] y)) -> (ORshiftRL  x y [c])
+(OR  (SRLconst [c] y) x) -> (ORshiftRL  x y [c])
+(OR  x (SRAconst [c] y)) -> (ORshiftRA  x y [c])
+(OR  (SRAconst [c] y) x) -> (ORshiftRA  x y [c])
+(XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
+(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
+(XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
+(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
+(XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
+(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
+(BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
+(BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
+(BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
+(CMP x (SLLconst [c] y)) -> (CMPshiftLL x y [c])
+(CMP (SLLconst [c] y) x) -> (InvertFlags (CMPshiftLL x y [c]))
+(CMP x (SRLconst [c] y)) -> (CMPshiftRL x y [c])
+(CMP (SRLconst [c] y) x) -> (InvertFlags (CMPshiftRL x y [c]))
+(CMP x (SRAconst [c] y)) -> (CMPshiftRA x y [c])
+(CMP (SRAconst [c] y) x) -> (InvertFlags (CMPshiftRA x y [c]))
+
+// prefer *const ops to *shift ops
+(ADDshiftLL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SLLconst <x.Type> x [d]))
+(ADDshiftRL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRLconst <x.Type> x [d]))
+(ADDshiftRA (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRAconst <x.Type> x [d]))
+(ANDshiftLL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SLLconst <x.Type> x [d]))
+(ANDshiftRL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRLconst <x.Type> x [d]))
+(ANDshiftRA (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRAconst <x.Type> x [d]))
+(ORshiftLL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SLLconst <x.Type> x [d]))
+(ORshiftRL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRLconst <x.Type> x [d]))
+(ORshiftRA  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRAconst <x.Type> x [d]))
+(XORshiftLL (MOVDconst [c]) x [d]) -> (XORconst [c] (SLLconst <x.Type> x [d]))
+(XORshiftRL (MOVDconst [c]) x [d]) -> (XORconst [c] (SRLconst <x.Type> x [d]))
+(XORshiftRA (MOVDconst [c]) x [d]) -> (XORconst [c] (SRAconst <x.Type> x [d]))
+(CMPshiftLL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
+(CMPshiftRL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
+(CMPshiftRA (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
+
+// constant folding in *shift ops
+(ADDshiftLL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)<<uint64(d))])
+(ADDshiftRL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)>>uint64(d))])
+(ADDshiftRA x (MOVDconst [c]) [d]) -> (ADDconst x [int64(int64(c)>>uint64(d))])
+(SUBshiftLL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)<<uint64(d))])
+(SUBshiftRL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)>>uint64(d))])
+(SUBshiftRA x (MOVDconst [c]) [d]) -> (SUBconst x [int64(int64(c)>>uint64(d))])
+(ANDshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)<<uint64(d))])
+(ANDshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)>>uint64(d))])
+(ANDshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [int64(int64(c)>>uint64(d))])
+(ORshiftLL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)<<uint64(d))])
+(ORshiftRL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)>>uint64(d))])
+(ORshiftRA  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(int64(c)>>uint64(d))])
+(XORshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)<<uint64(d))])
+(XORshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)>>uint64(d))])
+(XORshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [int64(int64(c)>>uint64(d))])
+(BICshiftLL x (MOVDconst [c]) [d]) -> (BICconst x [int64(uint64(c)<<uint64(d))])
+(BICshiftRL x (MOVDconst [c]) [d]) -> (BICconst x [int64(uint64(c)>>uint64(d))])
+(BICshiftRA x (MOVDconst [c]) [d]) -> (BICconst x [int64(int64(c)>>uint64(d))])
+(CMPshiftLL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)<<uint64(d))])
+(CMPshiftRL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)>>uint64(d))])
+(CMPshiftRA x (MOVDconst [c]) [d]) -> (CMPconst x [int64(int64(c)>>uint64(d))])
+
+// simplification with *shift ops
+(SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(SUBshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(SUBshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d -> y
+(ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d -> y
+(ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d -> y
+(ORshiftLL  x y:(SLLconst x [c]) [d]) && c==d -> y
+(ORshiftRL  x y:(SRLconst x [c]) [d]) && c==d -> y
+(ORshiftRA  x y:(SRAconst x [c]) [d]) && c==d -> y
+(XORshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(XORshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(XORshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(BICshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
index e30fcd63420a26908dfb7b3768db549159a09c07..b586ec5b57a4c8f685a53450fe0a216c764d3785 100644 (file)
@@ -133,11 +133,11 @@ func init() {
        )
        // Common regInfo
        var (
-               gp01     = regInfo{inputs: nil, outputs: []regMask{gp}}
-               gp11     = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-               gp11sp   = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-               gp1flags = regInfo{inputs: []regMask{gpg}}
-               //gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
+               gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
+               gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
+               gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
+               gp1flags  = regInfo{inputs: []regMask{gpg}}
+               gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
                gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
                gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
                gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
@@ -145,8 +145,9 @@ func init() {
                //gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
                //gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
                //gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gpload  = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-               gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
+               gpload   = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
+               gpstore  = regInfo{inputs: []regMask{gpspsbg, gpg}}
+               gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
                //gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
                //gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
                fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
@@ -228,6 +229,29 @@ func init() {
                {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"},                  // arg0 compare to arg1, float32
                {name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
 
+               // shifted ops
+               {name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1<<auxInt
+               {name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, unsigned shift
+               {name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, signed shift
+               {name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1<<auxInt
+               {name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, unsigned shift
+               {name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, signed shift
+               {name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1<<auxInt)
+               {name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), unsigned shift
+               {name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), signed shift
+               {name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1<<auxInt
+               {name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, unsigned shift
+               {name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, signed shift
+               {name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1<<auxInt
+               {name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, unsigned shift
+               {name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, signed shift
+               {name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1<<auxInt)
+               {name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), unsigned shift
+               {name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), signed shift
+               {name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1<<auxInt
+               {name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
+               {name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
+
                // moves
                {name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true},      // 32 low bits of auxint
                {name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
@@ -252,6 +276,11 @@ func init() {
                {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
                {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 
+               {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
+               {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
+               {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
+               {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + auxInt + aux.  ar12=mem.
+
                // conversions
                {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"},   // move from arg0, sign-extended from byte
                {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
@@ -283,7 +312,8 @@ func init() {
                {name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"},     // float64 -> float32
 
                // conditional instructions
-               {name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
+               {name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"},  // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
+               {name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
 
                // function calls
                {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                              // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
index 75381c4246dfc005213c538ccbc2c48c9f64a140..8fa816d0755ff276a3e2627e20d77c315ef62b3d 100644 (file)
@@ -859,6 +859,27 @@ const (
        OpARM64CMNWconst
        OpARM64FCMPS
        OpARM64FCMPD
+       OpARM64ADDshiftLL
+       OpARM64ADDshiftRL
+       OpARM64ADDshiftRA
+       OpARM64SUBshiftLL
+       OpARM64SUBshiftRL
+       OpARM64SUBshiftRA
+       OpARM64ANDshiftLL
+       OpARM64ANDshiftRL
+       OpARM64ANDshiftRA
+       OpARM64ORshiftLL
+       OpARM64ORshiftRL
+       OpARM64ORshiftRA
+       OpARM64XORshiftLL
+       OpARM64XORshiftRL
+       OpARM64XORshiftRA
+       OpARM64BICshiftLL
+       OpARM64BICshiftRL
+       OpARM64BICshiftRA
+       OpARM64CMPshiftLL
+       OpARM64CMPshiftRL
+       OpARM64CMPshiftRA
        OpARM64MOVDconst
        OpARM64FMOVSconst
        OpARM64FMOVDconst
@@ -878,6 +899,10 @@ const (
        OpARM64MOVDstore
        OpARM64FMOVSstore
        OpARM64FMOVDstore
+       OpARM64MOVBstorezero
+       OpARM64MOVHstorezero
+       OpARM64MOVWstorezero
+       OpARM64MOVDstorezero
        OpARM64MOVBreg
        OpARM64MOVBUreg
        OpARM64MOVHreg
@@ -905,6 +930,7 @@ const (
        OpARM64FCVTSD
        OpARM64FCVTDS
        OpARM64CSELULT
+       OpARM64CSELULT0
        OpARM64CALLstatic
        OpARM64CALLclosure
        OpARM64CALLdefer
@@ -10596,6 +10622,312 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "ADDshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AADD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ADDshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AADD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ADDshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AADD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "SUBshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ASUB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "SUBshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ASUB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "SUBshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ASUB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ANDshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AAND,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ANDshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AAND,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ANDshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AAND,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ORshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AORR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ORshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AORR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "ORshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AORR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "XORshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AEOR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "XORshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AEOR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "XORshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.AEOR,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "BICshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ABIC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "BICshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ABIC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "BICshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ABIC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
+       {
+               name:    "CMPshiftLL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ACMP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+               },
+       },
+       {
+               name:    "CMPshiftRL",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ACMP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+               },
+       },
+       {
+               name:    "CMPshiftRA",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     arm64.ACMP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                               {1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+               },
+       },
        {
                name:              "MOVDconst",
                auxType:           auxInt64,
@@ -10845,6 +11177,50 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "MOVBstorezero",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     arm64.AMOVB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+                       },
+               },
+       },
+       {
+               name:    "MOVHstorezero",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     arm64.AMOVH,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+                       },
+               },
+       },
+       {
+               name:    "MOVWstorezero",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     arm64.AMOVW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+                       },
+               },
+       },
+       {
+               name:    "MOVDstorezero",
+               auxType: auxSymOff,
+               argLen:  2,
+               asm:     arm64.AMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+                       },
+               },
+       },
        {
                name:   "MOVBreg",
                argLen: 1,
@@ -11197,6 +11573,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "CSELULT0",
+               argLen: 2,
+               asm:    arm64.ACSEL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+                       },
+                       outputs: []outputInfo{
+                               {0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+                       },
+               },
+       },
        {
                name:         "CALLstatic",
                auxType:      auxSymOff,
index 6350d1d6b01aa12d8a0c7bcf709ca0d7a39a6db6..318718d652e5fc1e540846bbc88c6c74ca13def0 100644 (file)
@@ -12,14 +12,32 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64ADD(v, config)
        case OpARM64ADDconst:
                return rewriteValueARM64_OpARM64ADDconst(v, config)
+       case OpARM64ADDshiftLL:
+               return rewriteValueARM64_OpARM64ADDshiftLL(v, config)
+       case OpARM64ADDshiftRA:
+               return rewriteValueARM64_OpARM64ADDshiftRA(v, config)
+       case OpARM64ADDshiftRL:
+               return rewriteValueARM64_OpARM64ADDshiftRL(v, config)
        case OpARM64AND:
                return rewriteValueARM64_OpARM64AND(v, config)
        case OpARM64ANDconst:
                return rewriteValueARM64_OpARM64ANDconst(v, config)
+       case OpARM64ANDshiftLL:
+               return rewriteValueARM64_OpARM64ANDshiftLL(v, config)
+       case OpARM64ANDshiftRA:
+               return rewriteValueARM64_OpARM64ANDshiftRA(v, config)
+       case OpARM64ANDshiftRL:
+               return rewriteValueARM64_OpARM64ANDshiftRL(v, config)
        case OpARM64BIC:
                return rewriteValueARM64_OpARM64BIC(v, config)
        case OpARM64BICconst:
                return rewriteValueARM64_OpARM64BICconst(v, config)
+       case OpARM64BICshiftLL:
+               return rewriteValueARM64_OpARM64BICshiftLL(v, config)
+       case OpARM64BICshiftRA:
+               return rewriteValueARM64_OpARM64BICshiftRA(v, config)
+       case OpARM64BICshiftRL:
+               return rewriteValueARM64_OpARM64BICshiftRL(v, config)
        case OpARM64CMP:
                return rewriteValueARM64_OpARM64CMP(v, config)
        case OpARM64CMPW:
@@ -28,8 +46,16 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64CMPWconst(v, config)
        case OpARM64CMPconst:
                return rewriteValueARM64_OpARM64CMPconst(v, config)
+       case OpARM64CMPshiftLL:
+               return rewriteValueARM64_OpARM64CMPshiftLL(v, config)
+       case OpARM64CMPshiftRA:
+               return rewriteValueARM64_OpARM64CMPshiftRA(v, config)
+       case OpARM64CMPshiftRL:
+               return rewriteValueARM64_OpARM64CMPshiftRL(v, config)
        case OpARM64CSELULT:
                return rewriteValueARM64_OpARM64CSELULT(v, config)
+       case OpARM64CSELULT0:
+               return rewriteValueARM64_OpARM64CSELULT0(v, config)
        case OpARM64DIV:
                return rewriteValueARM64_OpARM64DIV(v, config)
        case OpARM64DIVW:
@@ -74,12 +100,16 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64MOVBreg(v, config)
        case OpARM64MOVBstore:
                return rewriteValueARM64_OpARM64MOVBstore(v, config)
+       case OpARM64MOVBstorezero:
+               return rewriteValueARM64_OpARM64MOVBstorezero(v, config)
        case OpARM64MOVDload:
                return rewriteValueARM64_OpARM64MOVDload(v, config)
        case OpARM64MOVDreg:
                return rewriteValueARM64_OpARM64MOVDreg(v, config)
        case OpARM64MOVDstore:
                return rewriteValueARM64_OpARM64MOVDstore(v, config)
+       case OpARM64MOVDstorezero:
+               return rewriteValueARM64_OpARM64MOVDstorezero(v, config)
        case OpARM64MOVHUload:
                return rewriteValueARM64_OpARM64MOVHUload(v, config)
        case OpARM64MOVHUreg:
@@ -90,6 +120,8 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64MOVHreg(v, config)
        case OpARM64MOVHstore:
                return rewriteValueARM64_OpARM64MOVHstore(v, config)
+       case OpARM64MOVHstorezero:
+               return rewriteValueARM64_OpARM64MOVHstorezero(v, config)
        case OpARM64MOVWUload:
                return rewriteValueARM64_OpARM64MOVWUload(v, config)
        case OpARM64MOVWUreg:
@@ -100,6 +132,8 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64MOVWreg(v, config)
        case OpARM64MOVWstore:
                return rewriteValueARM64_OpARM64MOVWstore(v, config)
+       case OpARM64MOVWstorezero:
+               return rewriteValueARM64_OpARM64MOVWstorezero(v, config)
        case OpARM64MUL:
                return rewriteValueARM64_OpARM64MUL(v, config)
        case OpARM64MULW:
@@ -114,6 +148,12 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64OR(v, config)
        case OpARM64ORconst:
                return rewriteValueARM64_OpARM64ORconst(v, config)
+       case OpARM64ORshiftLL:
+               return rewriteValueARM64_OpARM64ORshiftLL(v, config)
+       case OpARM64ORshiftRA:
+               return rewriteValueARM64_OpARM64ORshiftRA(v, config)
+       case OpARM64ORshiftRL:
+               return rewriteValueARM64_OpARM64ORshiftRL(v, config)
        case OpARM64SLL:
                return rewriteValueARM64_OpARM64SLL(v, config)
        case OpARM64SLLconst:
@@ -130,6 +170,12 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64SUB(v, config)
        case OpARM64SUBconst:
                return rewriteValueARM64_OpARM64SUBconst(v, config)
+       case OpARM64SUBshiftLL:
+               return rewriteValueARM64_OpARM64SUBshiftLL(v, config)
+       case OpARM64SUBshiftRA:
+               return rewriteValueARM64_OpARM64SUBshiftRA(v, config)
+       case OpARM64SUBshiftRL:
+               return rewriteValueARM64_OpARM64SUBshiftRL(v, config)
        case OpARM64UDIV:
                return rewriteValueARM64_OpARM64UDIV(v, config)
        case OpARM64UDIVW:
@@ -142,6 +188,12 @@ func rewriteValueARM64(v *Value, config *Config) bool {
                return rewriteValueARM64_OpARM64XOR(v, config)
        case OpARM64XORconst:
                return rewriteValueARM64_OpARM64XORconst(v, config)
+       case OpARM64XORshiftLL:
+               return rewriteValueARM64_OpARM64XORshiftLL(v, config)
+       case OpARM64XORshiftRA:
+               return rewriteValueARM64_OpARM64XORshiftRA(v, config)
+       case OpARM64XORshiftRL:
+               return rewriteValueARM64_OpARM64XORshiftRL(v, config)
        case OpAdd16:
                return rewriteValueARM64_OpAdd16(v, config)
        case OpAdd32:
@@ -684,6 +736,108 @@ func rewriteValueARM64_OpARM64ADD(v *Value, config *Config) bool {
                v.AddArg(y)
                return true
        }
+       // match: (ADD x (SLLconst [c] y))
+       // cond:
+       // result: (ADDshiftLL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD (SLLconst [c] y) x)
+       // cond:
+       // result: (ADDshiftLL x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD x (SRLconst [c] y))
+       // cond:
+       // result: (ADDshiftRL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ADDshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD (SRLconst [c] y) x)
+       // cond:
+       // result: (ADDshiftRL x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ADDshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD x (SRAconst [c] y))
+       // cond:
+       // result: (ADDshiftRA x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ADDshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ADD (SRAconst [c] y) x)
+       // cond:
+       // result: (ADDshiftRA x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ADDshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64ADDconst(v *Value, config *Config) bool {
@@ -768,6 +922,126 @@ func rewriteValueARM64_OpARM64ADDconst(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64ADDshiftLL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ADDshiftLL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ADDconst [c] (SLLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ADDshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ADDconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ADDshiftRA(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ADDshiftRA (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ADDconst [c] (SRAconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ADDshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ADDconst x [int64(int64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ADDshiftRL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ADDshiftRL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ADDconst [c] (SRLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ADDshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ADDconst x [int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64AND(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -829,6 +1103,108 @@ func rewriteValueARM64_OpARM64AND(v *Value, config *Config) bool {
                v.AddArg(y)
                return true
        }
+       // match: (AND x (SLLconst [c] y))
+       // cond:
+       // result: (ANDshiftLL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ANDshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (AND (SLLconst [c] y) x)
+       // cond:
+       // result: (ANDshiftLL x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ANDshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (AND x (SRLconst [c] y))
+       // cond:
+       // result: (ANDshiftRL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ANDshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (AND (SRLconst [c] y) x)
+       // cond:
+       // result: (ANDshiftRL x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ANDshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (AND x (SRAconst [c] y))
+       // cond:
+       // result: (ANDshiftRA x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ANDshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (AND (SRAconst [c] y) x)
+       // cond:
+       // result: (ANDshiftRA x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ANDshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64ANDconst(v *Value, config *Config) bool {
@@ -890,16 +1266,202 @@ func rewriteValueARM64_OpARM64ANDconst(v *Value, config *Config) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64BIC(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64ANDshiftLL(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (BIC x (MOVDconst [c]))
+       // match: (ANDshiftLL (MOVDconst [c]) x [d])
        // cond:
-       // result: (BICconst [c] x)
+       // result: (ANDconst [c] (SLLconst <x.Type> x [d]))
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ANDshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ANDconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ANDshiftLL x y:(SLLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SLLconst {
+                       break
+               }
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ANDshiftRA(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ANDshiftRA (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ANDconst [c] (SRAconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ANDshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ANDconst x [int64(int64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ANDshiftRA x y:(SRAconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRAconst {
+                       break
+               }
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ANDshiftRL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ANDshiftRL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (ANDconst [c] (SRLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ANDshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ANDconst x [int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ANDconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ANDshiftRL x y:(SRLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRLconst {
+                       break
+               }
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64BIC(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (BIC x (MOVDconst [c]))
+       // cond:
+       // result: (BICconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
@@ -920,6 +1482,57 @@ func rewriteValueARM64_OpARM64BIC(v *Value, config *Config) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (BIC x (SLLconst [c] y))
+       // cond:
+       // result: (BICshiftLL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64BICshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (BIC x (SRLconst [c] y))
+       // cond:
+       // result: (BICshiftRL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64BICshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (BIC x (SRAconst [c] y))
+       // cond:
+       // result: (BICshiftRA x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64BICshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64BICconst(v *Value, config *Config) bool {
@@ -965,102 +1578,336 @@ func rewriteValueARM64_OpARM64BICconst(v *Value, config *Config) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64CMP(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64BICshiftLL(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (CMP x (MOVDconst [c]))
+       // match: (BICshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: (CMPconst [c] x)
+       // result: (BICconst x [int64(uint64(c)<<uint64(d))])
        for {
+               d := v.AuxInt
                x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpARM64CMPconst)
-               v.AuxInt = c
+               v.reset(OpARM64BICconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (CMP (MOVDconst [c]) x)
-       // cond:
-       // result: (InvertFlags (CMPconst [c] x))
+       // match: (BICshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64CMPW(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64BICshiftRA(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (CMPW x (MOVDconst [c]))
+       // match: (BICshiftRA x (MOVDconst [c]) [d])
        // cond:
-       // result: (CMPWconst [int64(int32(c))] x)
+       // result: (BICconst x [int64(int64(c)>>uint64(d))])
        for {
+               d := v.AuxInt
                x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpARM64CMPWconst)
-               v.AuxInt = int64(int32(c))
+               v.reset(OpARM64BICconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (CMPW (MOVDconst [c]) x)
-       // cond:
-       // result: (InvertFlags (CMPWconst [int64(int32(c))] x))
+       // match: (BICshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpARM64InvertFlags)
-               v0 := b.NewValue0(v.Line, OpARM64CMPWconst, TypeFlags)
-               v0.AuxInt = int64(int32(c))
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64CMPWconst(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64BICshiftRL(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)==int32(y)
-       // result: (FlagEQ)
+       // match: (BICshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (BICconst x [int64(uint64(c)>>uint64(d))])
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) == int32(y)) {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpARM64FlagEQ)
+               c := v_1.AuxInt
+               v.reset(OpARM64BICconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
                return true
        }
-       // match: (CMPWconst (MOVDconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
-       // result: (FlagLT_ULT)
+       // match: (BICshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CMP(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMP x (MOVDconst [c]))
+       // cond:
+       // result: (CMPconst [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMP (MOVDconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPconst [c] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMP x (SLLconst [c] y))
+       // cond:
+       // result: (CMPshiftLL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64CMPshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMP (SLLconst [c] y) x)
+       // cond:
+       // result: (InvertFlags (CMPshiftLL x y [c]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPshiftLL, TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMP x (SRLconst [c] y))
+       // cond:
+       // result: (CMPshiftRL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64CMPshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMP (SRLconst [c] y) x)
+       // cond:
+       // result: (InvertFlags (CMPshiftRL x y [c]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPshiftRL, TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMP x (SRAconst [c] y))
+       // cond:
+       // result: (CMPshiftRA x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64CMPshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMP (SRAconst [c] y) x)
+       // cond:
+       // result: (InvertFlags (CMPshiftRA x y [c]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPshiftRA, TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CMPW(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPW x (MOVDconst [c]))
+       // cond:
+       // result: (CMPWconst [int64(int32(c))] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPWconst)
+               v.AuxInt = int64(int32(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPW (MOVDconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPWconst [int64(int32(c))] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPWconst, TypeFlags)
+               v0.AuxInt = int64(int32(c))
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CMPWconst(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)==int32(y)
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) == int32(y)) {
+                       break
+               }
+               v.reset(OpARM64FlagEQ)
+               return true
+       }
+       // match: (CMPWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
+       // result: (FlagLT_ULT)
        for {
                y := v.AuxInt
                v_0 := v.Args[0]
@@ -1316,61 +2163,205 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value, config *Config) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64CSELULT(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64CMPshiftLL(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (CSELULT _ y (FlagEQ))
+       // match: (CMPshiftLL (MOVDconst [c]) x [d])
        // cond:
-       // result: y
+       // result: (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
        for {
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64FlagEQ {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+               v0.AuxInt = c
+               v1 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+               v1.AuxInt = d
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (CSELULT x _ (FlagLT_ULT))
+       // match: (CMPshiftLL x (MOVDconst [c]) [d])
        // cond:
-       // result: x
+       // result: (CMPconst x [int64(uint64(c)<<uint64(d))])
        for {
+               d := v.AuxInt
                x := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64FlagLT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (CSELULT _ y (FlagLT_UGT))
+       return false
+}
+func rewriteValueARM64_OpARM64CMPshiftRA(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPshiftRA (MOVDconst [c]) x [d])
        // cond:
-       // result: y
+       // result: (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
        for {
-               y := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64FlagLT_UGT {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = y.Type
-               v.AddArg(y)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+               v0.AuxInt = c
+               v1 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+               v1.AuxInt = d
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                return true
        }
-       // match: (CSELULT x _ (FlagGT_ULT))
+       // match: (CMPshiftRA x (MOVDconst [c]) [d])
        // cond:
-       // result: x
+       // result: (CMPconst x [int64(int64(c)>>uint64(d))])
        for {
+               d := v.AuxInt
                x := v.Args[0]
-               v_2 := v.Args[2]
-               if v_2.Op != OpARM64FlagGT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               v.reset(OpCopy)
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CMPshiftRL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPshiftRL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64InvertFlags)
+               v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+               v0.AuxInt = c
+               v1 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+               v1.AuxInt = d
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMPshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (CMPconst x [int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64CMPconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CSELULT(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CSELULT x (MOVDconst [0]) flag)
+       // cond:
+       // result: (CSELULT0 x flag)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               flag := v.Args[2]
+               v.reset(OpARM64CSELULT0)
+               v.AddArg(x)
+               v.AddArg(flag)
+               return true
+       }
+       // match: (CSELULT _ y (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64FlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CSELULT x _ (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CSELULT _ y (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CSELULT x _ (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpARM64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
                v.Type = x.Type
                v.AddArg(x)
                return true
@@ -1391,6 +2382,75 @@ func rewriteValueARM64_OpARM64CSELULT(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64CSELULT0(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CSELULT0 _ (FlagEQ))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FlagEQ {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (CSELULT0 x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CSELULT0 _ (FlagLT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (CSELULT0 x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CSELULT0 _ (FlagGT_UGT))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64DIV(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -2505,6 +3565,27 @@ func rewriteValueARM64_OpARM64MOVBUload(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVBUreg(v *Value, config *Config) bool {
@@ -2619,6 +3700,27 @@ func rewriteValueARM64_OpARM64MOVBload(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVBreg(v *Value, config *Config) bool {
@@ -2714,6 +3816,28 @@ func rewriteValueARM64_OpARM64MOVBstore(v *Value, config *Config) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVBstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVBstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr x mem)
@@ -2842,12 +3966,12 @@ func rewriteValueARM64_OpARM64MOVBstore(v *Value, config *Config) bool {
        }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64MOVBstorezero(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
        // cond:
-       // result: (MOVDload [off1+off2] {sym} ptr mem)
+       // result: (MOVBstorezero [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -2858,16 +3982,16 @@ func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
                mem := v.Args[1]
-               v.reset(OpARM64MOVDload)
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2)
-       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -2882,43 +4006,113 @@ func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
                if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpARM64MOVDload)
+               v.reset(OpARM64MOVBstorezero)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDstore {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
        return false
 }
-func rewriteValueARM64_OpARM64MOVDreg(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (MOVDreg x)
-       // cond: x.Uses == 1
+       // match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond:
+       // result: (MOVDload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2)
+       // result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDreg(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVDreg x)
+       // cond: x.Uses == 1
        // result: (MOVDnop x)
        for {
                x := v.Args[0]
@@ -2995,6 +4189,77 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value, config *Config) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVDstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezero(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond:
+       // result: (MOVDstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2)
+       // result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVHUload(v *Value, config *Config) bool {
@@ -3067,6 +4332,27 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVHUreg(v *Value, config *Config) bool {
@@ -3205,6 +4491,27 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVHstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVHreg(v *Value, config *Config) bool {
@@ -3348,6 +4655,28 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value, config *Config) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVHstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
        // match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
        // cond:
        // result: (MOVHstore [off] {sym} ptr x mem)
@@ -3434,6 +4763,55 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64MOVHstorezero(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond:
+       // result: (MOVHstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2)
+       // result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64MOVWUload(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -3504,9 +4882,30 @@ func rewriteValueARM64_OpARM64MOVWUload(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MOVWUreg(v *Value, config *Config) bool {
+       // match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg(v *Value, config *Config) bool {
        b := v.Block
        _ = b
        // match: (MOVWUreg x:(MOVBUload _ _))
@@ -3666,6 +5065,27 @@ func rewriteValueARM64_OpARM64MOVWload(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
+       // match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVDconst [0])
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr2 := v_1.Args[0]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVWreg(v *Value, config *Config) bool {
@@ -3831,81 +5251,585 @@ func rewriteValueARM64_OpARM64MOVWstore(v *Value, config *Config) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-       // cond: canMergeSym(sym1,sym2)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       // match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+       // cond: canMergeSym(sym1,sym2)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+       // cond:
+       // result: (MOVWstorezero [off] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVWUreg {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+       // cond:
+       // result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1,sym2)
+       // result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDaddr {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (MUL x (MOVDconst [-1]))
+       // cond:
+       // result: (NEG x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL _ (MOVDconst [0]))
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL x (MOVDconst [1]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL x (MOVDconst [c]))
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [-1]) x)
+       // cond:
+       // result: (NEG x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != -1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpARM64NEG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [0]) _)
+       // cond:
+       // result: (MOVDconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MUL (MOVDconst [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && c >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && c >= 3) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && c >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && c >= 7) {
+                       break
+               }
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%7 == 0 && isPowerOfTwo(c/7)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MUL (MOVDconst [c]) x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDaddr {
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
                        break
                }
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+       // match: (MUL   (MOVDconst [c]) (MOVDconst [d]))
        // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
+       // result: (MOVDconst [c*d])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWreg {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               ptr := v.Args[0]
+               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVWUreg {
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpARM64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               d := v_1.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = c * d
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (MUL x (MOVDconst [-1]))
-       // cond:
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==-1
        // result: (NEG x)
        for {
                x := v.Args[0]
@@ -3913,30 +5837,32 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != -1 {
+               c := v_1.AuxInt
+               if !(int32(c) == -1) {
                        break
                }
                v.reset(OpARM64NEG)
                v.AddArg(x)
                return true
        }
-       // match: (MUL _ (MOVDconst [0]))
-       // cond:
+       // match: (MULW _ (MOVDconst [c]))
+       // cond: int32(c)==0
        // result: (MOVDconst [0])
        for {
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 0 {
+               c := v_1.AuxInt
+               if !(int32(c) == 0) {
                        break
                }
                v.reset(OpARM64MOVDconst)
                v.AuxInt = 0
                return true
        }
-       // match: (MUL x (MOVDconst [1]))
-       // cond:
+       // match: (MULW x (MOVDconst [c]))
+       // cond: int32(c)==1
        // result: x
        for {
                x := v.Args[0]
@@ -3944,7 +5870,8 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               c := v_1.AuxInt
+               if !(int32(c) == 1) {
                        break
                }
                v.reset(OpCopy)
@@ -3952,7 +5879,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL x (MOVDconst [c]))
+       // match: (MULW x (MOVDconst [c]))
        // cond: isPowerOfTwo(c)
        // result: (SLLconst [log2(c)] x)
        for {
@@ -3970,98 +5897,117 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [-1]) x)
-       // cond:
-       // result: (NEG x)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != -1 {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpARM64NEG)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [0]) _)
-       // cond:
-       // result: (MOVDconst [0])
+       // match: (MULW x (MOVDconst [c]))
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 0 {
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
                return true
        }
-       // match: (MUL (MOVDconst [1]) x)
-       // cond:
-       // result: x
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               if v_0.AuxInt != 1 {
+               c := v_1.AuxInt
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               x := v.Args[1]
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MUL (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               c := v_1.AuxInt
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MUL   (MOVDconst [c]) (MOVDconst [d]))
-       // cond:
-       // result: (MOVDconst [c*d])
+       // match: (MULW x (MOVDconst [c]))
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64MOVDconst {
-                       break
-               }
-               c := v_0.AuxInt
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_1.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c * d
+               c := v_1.AuxInt
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
-       b := v.Block
-       _ = b
        // match: (MULW x (MOVDconst [c]))
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
@@ -4069,6 +6015,28 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
                        break
                }
                c := v_1.AuxInt
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: int32(c)==-1
+       // result: (NEG x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(int32(c) == -1) {
                        break
                }
@@ -4076,15 +6044,15 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
                v.AddArg(x)
                return true
        }
-       // match: (MULW _ (MOVDconst [c]))
+       // match: (MULW (MOVDconst [c]) _)
        // cond: int32(c)==0
        // result: (MOVDconst [0])
        for {
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
                if !(int32(c) == 0) {
                        break
                }
@@ -4092,45 +6060,85 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
+       // match: (MULW (MOVDconst [c]) x)
        // cond: int32(c)==1
        // result: x
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(int32(c) == 1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c)
+       // result: (SLLconst [log2(c)] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c-1) && int32(c) >= 3
+       // result: (ADDshiftLL x x [log2(c-1)])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(int32(c) == 1) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c - 1)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (MULW x (MOVDconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: isPowerOfTwo(c+1) && int32(c) >= 7
+       // result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpARM64MOVDconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
                        break
                }
-               v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
+               v.reset(OpARM64ADDshiftLL)
+               v.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
                v.AddArg(x)
                return true
        }
        // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==-1
-       // result: (NEG x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+       // result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -4138,32 +6146,43 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(int32(c) == -1) {
+               if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64NEG)
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 1
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MULW (MOVDconst [c]) _)
-       // cond: int32(c)==0
-       // result: (MOVDconst [0])
+       // match: (MULW (MOVDconst [c]) x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+       // result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
                c := v_0.AuxInt
-               if !(int32(c) == 0) {
+               x := v.Args[1]
+               if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = 0
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 2
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        // match: (MULW (MOVDconst [c]) x)
-       // cond: int32(c)==1
-       // result: x
+       // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+       // result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -4171,17 +6190,23 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(int32(c) == 1) {
+               if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.reset(OpARM64SLLconst)
+               v.AuxInt = log2(c / 7)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        // match: (MULW (MOVDconst [c]) x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLLconst [log2(c)] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+       // result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
@@ -4189,12 +6214,16 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(isPowerOfTwo(c)) {
+               if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
                        break
                }
                v.reset(OpARM64SLLconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+               v0.AuxInt = 3
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
        // match: (MULW  (MOVDconst [c]) (MOVDconst [d]))
@@ -4309,35 +6338,245 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value, config *Config) bool {
        // result: (MOVDconst [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64FlagGT_UGT {
+               if v_0.Op != OpARM64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (NotEqual (InvertFlags x))
+       // cond:
+       // result: (NotEqual x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpARM64NotEqual)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (OR  (MOVDconst [c]) x)
+       // cond:
+       // result: (ORconst  [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR  x (MOVDconst [c]))
+       // cond:
+       // result: (ORconst  [c] x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR  x x)
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR  x (SLLconst [c] y))
+       // cond:
+       // result: (ORshiftLL  x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR  (SLLconst [c] y) x)
+       // cond:
+       // result: (ORshiftLL  x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR  x (SRLconst [c] y))
+       // cond:
+       // result: (ORshiftRL  x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR  (SRLconst [c] y) x)
+       // cond:
+       // result: (ORshiftRL  x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR  x (SRAconst [c] y))
+       // cond:
+       // result: (ORshiftRA  x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (OR  (SRAconst [c] y) x)
+       // cond:
+       // result: (ORshiftRA  x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64ORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64ORconst(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ORconst  [0]  x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORconst  [-1] _)
+       // cond:
+       // result: (MOVDconst [-1])
+       for {
+               if v.AuxInt != -1 {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (ORconst  [c] (MOVDconst [d]))
+       // cond:
+       // result: (MOVDconst [c|d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
+               d := v_0.AuxInt
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = 1
+               v.AuxInt = c | d
                return true
        }
-       // match: (NotEqual (InvertFlags x))
+       // match: (ORconst  [c] (ORconst [d] x))
        // cond:
-       // result: (NotEqual x)
+       // result: (ORconst [c|d] x)
        for {
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpARM64InvertFlags {
+               if v_0.Op != OpARM64ORconst {
                        break
                }
+               d := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpARM64NotEqual)
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c | d
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64ORshiftLL(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (OR  (MOVDconst [c]) x)
+       // match: (ORshiftLL  (MOVDconst [c]) x [d])
        // cond:
-       // result: (ORconst  [c] x)
+       // result: (ORconst  [c] (SLLconst <x.Type> x [d]))
        for {
+               d := v.AuxInt
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
@@ -4346,13 +6585,17 @@ func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
                x := v.Args[1]
                v.reset(OpARM64ORconst)
                v.AuxInt = c
-               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (OR  x (MOVDconst [c]))
+       // match: (ORshiftLL  x (MOVDconst [c]) [d])
        // cond:
-       // result: (ORconst  [c] x)
+       // result: (ORconst  x [int64(uint64(c)<<uint64(d))])
        for {
+               d := v.AuxInt
                x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpARM64MOVDconst {
@@ -4360,82 +6603,156 @@ func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
                }
                c := v_1.AuxInt
                v.reset(OpARM64ORconst)
-               v.AuxInt = c
+               v.AuxInt = int64(uint64(c) << uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (OR  x x)
-       // cond:
-       // result: x
+       // match: (ORshiftLL  x y:(SLLconst x [c]) [d])
+       // cond: c==d
+       // result: y
        for {
+               d := v.AuxInt
                x := v.Args[0]
-               if x != v.Args[1] {
+               y := v.Args[1]
+               if y.Op != OpARM64SLLconst {
+                       break
+               }
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
                        break
                }
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueARM64_OpARM64ORconst(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64ORshiftRA(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (ORconst  [0]  x)
+       // match: (ORshiftRA  (MOVDconst [c]) x [d])
        // cond:
-       // result: x
+       // result: (ORconst  [c] (SRAconst <x.Type> x [d]))
        for {
-               if v.AuxInt != 0 {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
                        break
                }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (ORshiftRA  x (MOVDconst [c]) [d])
+       // cond:
+       // result: (ORconst  x [int64(int64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64ORconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (ORconst  [-1] _)
-       // cond:
-       // result: (MOVDconst [-1])
+       // match: (ORshiftRA  x y:(SRAconst x [c]) [d])
+       // cond: c==d
+       // result: y
        for {
-               if v.AuxInt != -1 {
+               d := v.AuxInt
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRAconst {
                        break
                }
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = -1
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (ORconst  [c] (MOVDconst [d]))
+       return false
+}
+func rewriteValueARM64_OpARM64ORshiftRL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (ORshiftRL  (MOVDconst [c]) x [d])
        // cond:
-       // result: (MOVDconst [c|d])
+       // result: (ORconst  [c] (SRLconst <x.Type> x [d]))
        for {
-               c := v.AuxInt
+               d := v.AuxInt
                v_0 := v.Args[0]
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpARM64MOVDconst)
-               v.AuxInt = c | d
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64ORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (ORconst  [c] (ORconst [d] x))
+       // match: (ORshiftRL  x (MOVDconst [c]) [d])
        // cond:
-       // result: (ORconst [c|d] x)
+       // result: (ORconst  x [int64(uint64(c)>>uint64(d))])
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ORconst {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
+               c := v_1.AuxInt
                v.reset(OpARM64ORconst)
-               v.AuxInt = c | d
+               v.AuxInt = int64(uint64(c) >> uint64(d))
                v.AddArg(x)
                return true
        }
+       // match: (ORshiftRL  x y:(SRLconst x [c]) [d])
+       // cond: c==d
+       // result: y
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpARM64SRLconst {
+                       break
+               }
+               c := y.AuxInt
+               if x != y.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64SLL(v *Value, config *Config) bool {
@@ -4585,6 +6902,57 @@ func rewriteValueARM64_OpARM64SUB(v *Value, config *Config) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (SUB x (SLLconst [c] y))
+       // cond:
+       // result: (SUBshiftLL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64SUBshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB x (SRLconst [c] y))
+       // cond:
+       // result: (SUBshiftRL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64SUBshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SUB x (SRAconst [c] y))
+       // cond:
+       // result: (SUBshiftRA x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64SUBshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64SUBconst(v *Value, config *Config) bool {
@@ -4612,41 +6980,167 @@ func rewriteValueARM64_OpARM64SUBconst(v *Value, config *Config) bool {
                if v_0.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_0.AuxInt
+               d := v_0.AuxInt
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = d - c
+               return true
+       }
+       // match: (SUBconst [c] (SUBconst [d] x))
+       // cond:
+       // result: (ADDconst [-c-d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SUBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = -c - d
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBconst [c] (ADDconst [d] x))
+       // cond:
+       // result: (ADDconst [-c+d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64ADDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpARM64ADDconst)
+               v.AuxInt = -c + d
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUBshiftLL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (SUBshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (SUBconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64SUBshiftRA(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (SUBshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (SUBconst x [int64(int64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
                v.reset(OpARM64MOVDconst)
-               v.AuxInt = d - c
+               v.AuxInt = 0
                return true
        }
-       // match: (SUBconst [c] (SUBconst [d] x))
+       return false
+}
+func rewriteValueARM64_OpARM64SUBshiftRL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (SUBshiftRL x (MOVDconst [c]) [d])
        // cond:
-       // result: (ADDconst [-c-d] x)
+       // result: (SUBconst x [int64(uint64(c)>>uint64(d))])
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64SUBconst {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = -c - d
+               c := v_1.AuxInt
+               v.reset(OpARM64SUBconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
                v.AddArg(x)
                return true
        }
-       // match: (SUBconst [c] (ADDconst [d] x))
-       // cond:
-       // result: (ADDconst [-c+d] x)
+       // match: (SUBshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpARM64ADDconst {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpARM64ADDconst)
-               v.AuxInt = -c + d
-               v.AddArg(x)
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
                return true
        }
        return false
@@ -4926,6 +7420,108 @@ func rewriteValueARM64_OpARM64XOR(v *Value, config *Config) bool {
                v.AuxInt = 0
                return true
        }
+       // match: (XOR x (SLLconst [c] y))
+       // cond:
+       // result: (XORshiftLL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64XORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR (SLLconst [c] y) x)
+       // cond:
+       // result: (XORshiftLL x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64XORshiftLL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR x (SRLconst [c] y))
+       // cond:
+       // result: (XORshiftRL x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64XORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR (SRLconst [c] y) x)
+       // cond:
+       // result: (XORshiftRL x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64XORshiftRL)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR x (SRAconst [c] y))
+       // cond:
+       // result: (XORshiftRA x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               v.reset(OpARM64XORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (XOR (SRAconst [c] y) x)
+       // cond:
+       // result: (XORshiftRA x y [c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_0.AuxInt
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpARM64XORshiftRA)
+               v.AuxInt = c
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64XORconst(v *Value, config *Config) bool {
@@ -4988,6 +7584,189 @@ func rewriteValueARM64_OpARM64XORconst(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64XORshiftLL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XORshiftLL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (XORconst [c] (SLLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64XORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (XORshiftLL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (XORconst x [int64(uint64(c)<<uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = int64(uint64(c) << uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORshiftLL x (SLLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64XORshiftRA(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XORshiftRA (MOVDconst [c]) x [d])
+       // cond:
+       // result: (XORconst [c] (SRAconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64XORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (XORshiftRA x (MOVDconst [c]) [d])
+       // cond:
+       // result: (XORconst x [int64(int64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = int64(int64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORshiftRA x (SRAconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRAconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64XORshiftRL(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (XORshiftRL (MOVDconst [c]) x [d])
+       // cond:
+       // result: (XORconst [c] (SRLconst <x.Type> x [d]))
+       for {
+               d := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpARM64XORconst)
+               v.AuxInt = c
+               v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+               v0.AuxInt = d
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (XORshiftRL x (MOVDconst [c]) [d])
+       // cond:
+       // result: (XORconst x [int64(uint64(c)>>uint64(d))])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64XORconst)
+               v.AuxInt = int64(uint64(c) >> uint64(d))
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORshiftRL x (SRLconst x [c]) [d])
+       // cond: c==d
+       // result: (MOVDconst [0])
+       for {
+               d := v.AuxInt
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64SRLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(c == d) {
+                       break
+               }
+               v.reset(OpARM64MOVDconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpAdd16(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index ad52be66b8dfd880201aa3a845efd06abf5cb67d..9a02452158989790c925bf33cc357173c00fc577 100644 (file)
@@ -714,3 +714,10 @@ const (
        AB  = obj.AJMP
        ABL = obj.ACALL
 )
+
+const (
+       // shift types
+       SHIFT_LL = 0 << 22
+       SHIFT_LR = 1 << 22
+       SHIFT_AR = 2 << 22
+)
index b6861f4c1e572c7b112ea61c2c685928688c1c1d..3c66eecbf0dd803c9d6ea3ddd7e2c2ca0f96318a 100644 (file)
@@ -112,13 +112,17 @@ import (
 //                     val = int32(y)
 //
 //     reg<<shift, reg>>shift, reg->shift, reg@>shift
-//             Shifted register value, for ARM.
+//             Shifted register value, for ARM and ARM64.
 //             In this form, reg must be a register and shift can be a register or an integer constant.
 //             Encoding:
 //                     type = TYPE_SHIFT
+//             On ARM:
 //                     offset = (reg&15) | shifttype<<5 | count
 //                     shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
 //                     count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
+//             On ARM64:
+//                     offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10
+//                     shifttype = 0, 1, 2 for <<, >>, ->
 //
 //     (reg, reg)
 //             A destination register pair. When used as the last argument of an instruction,
index 18813c35a8ea3ecb173e33204e5511c3d3d5e805..c8f8760af1a6151fcd9d63c1494269d3ac9739ea 100644 (file)
@@ -286,14 +286,23 @@ func Dconv(p *Prog, a *Addr) string {
 
        case TYPE_SHIFT:
                v := int(a.Offset)
-               op := "<<>>->@>"[((v>>5)&3)<<1:]
-               if v&(1<<4) != 0 {
-                       str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
-               } else {
-                       str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
-               }
-               if a.Reg != 0 {
-                       str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
+               ops := "<<>>->@>"
+               switch goarch := Getgoarch(); goarch {
+               case "arm":
+                       op := ops[((v>>5)&3)<<1:]
+                       if v&(1<<4) != 0 {
+                               str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
+                       } else {
+                               str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
+                       }
+                       if a.Reg != 0 {
+                               str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
+                       }
+               case "arm64":
+                       op := ops[((v>>22)&3)<<1:]
+                       str = fmt.Sprintf("R%d%c%c%d", (v>>16)&31, op[0], op[1], (v>>10)&63)
+               default:
+                       panic("TYPE_SHIFT is not supported on " + goarch)
                }
 
        case TYPE_REGREG:
index 78ba498a362771620c8233f364f46efa4424ad26..8675840d0ec0a2f8a7cfde6fa2cab9509ddc31af 100644 (file)
@@ -1,4 +1,4 @@
-// +build !amd64,!arm,!amd64p32,!386
+// +build !amd64,!arm,!amd64p32,!386,!arm64
 // errorcheck -0 -l -live -wb=0
 
 // Copyright 2014 The Go Authors. All rights reserved.
index 4da31c6f4e442d4888172e500f46302cc30053be..881f139a20fe2687400454174e6093f2cd911c0d 100644 (file)
@@ -1,4 +1,4 @@
-// +build amd64 arm amd64p32 386
+// +build amd64 arm amd64p32 386 arm64
 // errorcheck -0 -l -live -wb=0
 
 // Copyright 2014 The Go Authors. All rights reserved.
index 5b174e02273d2d82eaee032d18ed62c96962871a..a81efb7d8e40f11a12ef2ece7b401adc003ed512 100644 (file)
@@ -2,7 +2,7 @@
 // Fails on ppc64x because of incomplete optimization.
 // See issues 9058.
 // Same reason for mips64x and s390x.
-// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386
+// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386,!arm64
 
 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
index 73f888fff102cc98de65a5f465e235df59d2bbb5..0974a84333714df65fe3c8b5ee9a5d38273089fe 100644 (file)
@@ -1,5 +1,5 @@
 // errorcheck -0 -d=nil
-// +build amd64 arm amd64p32 386
+// +build amd64 arm amd64p32 386 arm64
 
 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
index 115f8166f3832090edd1773c3820acdfe760f924..9dc8a4444c030f031faefc95b0e7f9be846e70fe 100644 (file)
@@ -1,4 +1,4 @@
-// +build !amd64,!arm,!amd64p32,!386
+// +build !amd64,!arm,!amd64p32,!386,!arm64
 // errorcheck -0 -d=append,slice
 
 // Copyright 2015 The Go Authors. All rights reserved.