]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal/ssa: combine consecutive BigEndian stores on arm64
authorChad Rosier <mrosier.qdt@qualcommdatacenter.com>
Wed, 28 Feb 2018 18:29:03 +0000 (13:29 -0500)
committerCherry Zhang <cherryyz@google.com>
Thu, 1 Mar 2018 20:29:22 +0000 (20:29 +0000)
This optimization mirrors that which is already implemented for AMD64.  The
optimization specifically targets the binary.BigEndian.PutUint* functions.

encoding-binary results on Amberwing:
name                   old time/op    new time/op    delta
ReadSlice1000Int32s      9.83µs ± 2%    9.78µs ± 1%     ~     (p=0.362 n=9+10)
ReadStruct               5.24µs ± 3%    5.19µs ± 2%     ~     (p=0.285 n=10+10)
ReadInts                 8.35µs ± 8%    8.44µs ± 3%     ~     (p=0.323 n=10+10)
WriteInts                3.38µs ± 3%    3.44µs ±15%     ~     (p=0.921 n=9+10)
WriteSlice1000Int32s     11.4µs ± 6%    10.2µs ± 4%   -9.94%  (p=0.000 n=10+10)
PutUint16                 510ns ±12%     500ns ± 0%     ~     (p=0.586 n=10+7)
PutUint32                 530ns ±15%     490ns ±12%     ~     (p=0.086 n=10+10)
PutUint64                 550ns ± 0%     470ns ± 6%  -14.52%  (p=0.000 n=7+10)
LittleEndianPutUint16     500ns ± 0%     475ns ±16%     ~     (p=0.120 n=7+10)
LittleEndianPutUint32     450ns ± 0%     517ns ±16%  +14.81%  (p=0.004 n=8+9)
LittleEndianPutUint64     550ns ± 0%     485ns ±13%  -11.82%  (p=0.000 n=8+10)
PutUvarint32              685ns ±12%     622ns ± 4%   -9.17%  (p=0.005 n=10+9)
PutUvarint64              735ns ± 9%     711ns ± 9%     ~     (p=0.272 n=10+9)
[Geo mean]               1.47µs         1.42µs        -3.87%

name                   old speed      new speed      delta
ReadSlice1000Int32s     407MB/s ± 2%   409MB/s ± 1%     ~     (p=0.362 n=9+10)
ReadStruct             14.3MB/s ± 3%  14.4MB/s ± 2%     ~     (p=0.250 n=10+10)
ReadInts               3.59MB/s ± 7%  3.56MB/s ± 4%     ~     (p=0.340 n=10+10)
WriteInts              8.87MB/s ± 3%  8.74MB/s ±13%     ~     (p=0.890 n=9+10)
WriteSlice1000Int32s    352MB/s ± 6%   391MB/s ± 4%  +11.03%  (p=0.000 n=10+10)
PutUint16              3.95MB/s ±13%  4.00MB/s ± 0%     ~     (p=0.312 n=10+7)
PutUint32              7.62MB/s ±17%  8.21MB/s ±11%     ~     (p=0.086 n=10+10)
PutUint64              14.6MB/s ± 0%  17.1MB/s ± 6%  +17.28%  (p=0.000 n=7+10)
LittleEndianPutUint16  4.00MB/s ± 0%  4.23MB/s ±18%     ~     (p=0.176 n=7+10)
LittleEndianPutUint32  8.89MB/s ± 0%  7.64MB/s ±20%  -14.05%  (p=0.001 n=8+10)
LittleEndianPutUint64  14.6MB/s ± 0%  16.6MB/s ±12%  +13.86%  (p=0.000 n=8+10)
PutUvarint32           5.86MB/s ±14%  6.44MB/s ± 5%   +9.84%  (p=0.006 n=10+9)
PutUvarint64           10.9MB/s ± 8%  11.3MB/s ± 9%     ~     (p=0.373 n=10+9)
[Geo mean]             14.2MB/s       14.8MB/s        +3.93%

go1 results on Amberwing:
RegexpMatchEasy0_32       254ns ± 0%     254ns ± 0%    ~     (all equal)
RegexpMatchEasy0_1K       547ns ± 0%     547ns ± 0%    ~     (all equal)
RegexpMatchEasy1_32       252ns ± 0%     253ns ± 1%    ~     (p=0.294 n=8+10)
RegexpMatchEasy1_1K       782ns ± 0%     783ns ± 1%    ~     (p=0.529 n=8+9)
RegexpMatchMedium_32      316ns ± 0%     316ns ± 0%    ~     (all equal)
RegexpMatchMedium_1K     51.5µs ± 0%    51.5µs ± 0%    ~     (p=0.645 n=10+9)
RegexpMatchHard_32       2.75µs ± 0%    2.75µs ± 0%    ~     (all equal)
RegexpMatchHard_1K       78.7µs ± 0%    78.7µs ± 0%    ~     (p=0.754 n=10+10)
FmtFprintfEmpty          57.0ns ± 0%    57.0ns ± 0%    ~     (all equal)
FmtFprintfString          111ns ± 0%     111ns ± 0%    ~     (all equal)
FmtFprintfInt             114ns ± 0%     114ns ± 1%    ~     (p=0.065 n=9+10)
FmtFprintfIntInt          182ns ± 0%     178ns ± 0%  -2.20%  (p=0.000 n=10+10)
FmtFprintfPrefixedInt     225ns ± 0%     227ns ± 0%  +0.89%  (p=0.000 n=10+10)
FmtFprintfFloat           307ns ± 0%     307ns ± 0%    ~     (p=1.000 n=9+9)
FmtManyArgs               697ns ± 0%     701ns ± 2%    ~     (p=0.108 n=9+10)
Gzip                      436ms ± 0%     437ms ± 0%  +0.23%  (p=0.000 n=10+8)
HTTPClientServer         88.8µs ± 2%    89.6µs ± 1%  +0.98%  (p=0.019 n=10+10)
JSONEncode               20.1ms ± 1%    20.2ms ± 1%  +0.48%  (p=0.007 n=10+10)
JSONDecode               94.7ms ± 1%    94.1ms ± 0%  -0.62%  (p=0.000 n=10+9)
GobDecode                12.6ms ± 2%    12.6ms ± 1%    ~     (p=0.360 n=10+8)
GobEncode                12.0ms ± 1%    11.9ms ± 1%  -1.34%  (p=0.000 n=10+10)
Mandelbrot200            5.05ms ± 0%    5.05ms ± 0%  +0.12%  (p=0.000 n=10+10)
TimeParse                 448ns ± 0%     448ns ± 0%    ~     (p=0.529 n=8+9)
TimeFormat                501ns ± 1%     501ns ± 1%    ~     (p=1.000 n=10+9)
Template                 90.6ms ± 0%    89.1ms ± 0%  -1.67%  (p=0.000 n=9+9)
GoParse                  6.01ms ± 0%    5.96ms ± 0%  -0.83%  (p=0.000 n=10+9)
BinaryTree17              11.7s ± 0%     11.7s ± 0%    ~     (p=0.481 n=10+10)
Revcomp                   675ms ± 0%     675ms ± 0%    ~     (p=0.436 n=9+9)
Fannkuch11                3.26s ± 0%     3.27s ± 1%  +0.57%  (p=0.000 n=10+10)
[Geo mean]               67.4µs         67.3µs       -0.10%

name                   old speed      new speed      delta
RegexpMatchEasy0_32     126MB/s ± 0%   126MB/s ± 0%    ~     (p=0.353 n=10+7)
RegexpMatchEasy0_1K    1.87GB/s ± 0%  1.87GB/s ± 0%    ~     (p=0.275 n=8+10)
RegexpMatchEasy1_32     127MB/s ± 0%   126MB/s ± 1%    ~     (p=0.110 n=8+10)
RegexpMatchEasy1_1K    1.31GB/s ± 0%  1.31GB/s ± 1%    ~     (p=0.079 n=8+10)
RegexpMatchMedium_32   3.16MB/s ± 0%  3.16MB/s ± 0%    ~     (all equal)
RegexpMatchMedium_1K   19.9MB/s ± 0%  19.9MB/s ± 0%    ~     (p=0.889 n=10+9)
RegexpMatchHard_32     11.7MB/s ± 0%  11.7MB/s ± 0%    ~     (all equal)
RegexpMatchHard_1K     13.0MB/s ± 0%  13.0MB/s ± 0%    ~     (p=1.000 n=10+10)
Gzip                   44.5MB/s ± 0%  44.4MB/s ± 0%  -0.22%  (p=0.000 n=10+8)
JSONEncode             96.6MB/s ± 1%  96.1MB/s ± 1%  -0.48%  (p=0.007 n=10+10)
JSONDecode             20.5MB/s ± 1%  20.6MB/s ± 0%  +0.63%  (p=0.000 n=10+9)
GobDecode              61.0MB/s ± 2%  61.1MB/s ± 1%    ~     (p=0.372 n=10+8)
GobEncode              63.8MB/s ± 1%  64.7MB/s ± 1%  +1.36%  (p=0.000 n=10+10)
Template               21.4MB/s ± 0%  21.8MB/s ± 0%  +1.69%  (p=0.000 n=9+9)
GoParse                9.63MB/s ± 0%  9.71MB/s ± 0%  +0.84%  (p=0.000 n=9+8)
Revcomp                 377MB/s ± 0%   376MB/s ± 0%    ~     (p=0.399 n=9+9)
[Geo mean]             56.2MB/s       56.3MB/s       +0.20%

Change-Id: Ic915373f5ef512f9fbc45745860e5db7f6de6286
Reviewed-on: https://go-review.googlesource.com/97755
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/gc/asm_test.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/rewriteARM64.go

index da054dc8d1571455f5b4469d72a6e83bae5ed7a5..8ed0ca7517fec5d95ee8a1147b6674dd58775c1c 100644 (file)
@@ -3318,6 +3318,33 @@ var linuxARM64Tests = []*asmTest{
                pos: []string{"MOVD"},
                neg: []string{"MOVB", "MOVH", "MOVW"},
        },
+       {
+               fn: `
+               func $(b []byte, v uint16) {
+                       binary.BigEndian.PutUint16(b, v)
+               }
+               `,
+               pos: []string{"MOVH"},
+               neg: []string{"MOVB"},
+       },
+       {
+               fn: `
+               func $(b []byte, v uint32) {
+                       binary.BigEndian.PutUint32(b, v)
+               }
+               `,
+               pos: []string{"MOVW"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(b []byte, v uint64) {
+                       binary.BigEndian.PutUint64(b, v)
+               }
+               `,
+               pos: []string{"MOVD"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
 }
 
 var linuxMIPSTests = []*asmTest{
index 972a7f03de2e032041fd2fe60fc304e28426cd0a..7b31d46c5a97371ee04e84f008d5d1436ab038e7 100644 (file)
        && isSamePtr(ptr0, ptr1)
        && clobber(x)
        -> (MOVDstore [i-4] {s} ptr0 w0 mem)
+(MOVBstore [i] {s} ptr w
+       x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
+       x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
+       x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
+       x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
+       x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
+       x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
+       x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && x3.Uses == 1
+       && x4.Uses == 1
+       && x5.Uses == 1
+       && x6.Uses == 1
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       && clobber(x3)
+       && clobber(x4)
+       && clobber(x5)
+       && clobber(x6)
+       -> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
+(MOVBstore [i] {s} ptr w
+       x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w))
+       x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVWUreg w))
+       x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVWUreg w)) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+(MOVBstore [i] {s} ptr w
+       x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
+       x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
+       x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
+       && x0.Uses == 1
+       && x1.Uses == 1
+       && x2.Uses == 1
+       && clobber(x0)
+       && clobber(x1)
+       && clobber(x2)
+       -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVHUreg w)) mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem))
+       && x.Uses == 1
+       && clobber(x)
+       -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
 
 // FP simplification
 (FNEGS (FMULS x y)) -> (FNMULS x y)
index e4369d64c6f4edf11b6123e572f809e634b66385..10a7a4cb052e05c833e73ae3a475ae9d1b4a33b8 100644 (file)
@@ -6125,6 +6125,8 @@ func rewriteValueARM64_OpARM64MOVBstore_0(v *Value) bool {
        return false
 }
 func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVHUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
        // cond: x.Uses == 1 && isSamePtr(ptr0, ptr1) && clobber(x)
        // result: (MOVHstore [i-1] {s} ptr0 w mem)
@@ -6323,6 +6325,548 @@ func rewriteValueARM64_OpARM64MOVBstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w) x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w) x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               x3 := x2.Args[2]
+               if x3.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[2]
+               if ptr != x3.Args[0] {
+                       break
+               }
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x3_1.AuxInt != 32 {
+                       break
+               }
+               if w != x3_1.Args[0] {
+                       break
+               }
+               x4 := x3.Args[2]
+               if x4.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x4.AuxInt != i-5 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[2]
+               if ptr != x4.Args[0] {
+                       break
+               }
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x4_1.AuxInt != 40 {
+                       break
+               }
+               if w != x4_1.Args[0] {
+                       break
+               }
+               x5 := x4.Args[2]
+               if x5.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x5.AuxInt != i-6 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[2]
+               if ptr != x5.Args[0] {
+                       break
+               }
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x5_1.AuxInt != 48 {
+                       break
+               }
+               if w != x5_1.Args[0] {
+                       break
+               }
+               x6 := x5.Args[2]
+               if x6.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x6.AuxInt != i-7 {
+                       break
+               }
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[2]
+               if ptr != x6.Args[0] {
+                       break
+               }
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x6_1.AuxInt != 56 {
+                       break
+               }
+               if w != x6_1.Args[0] {
+                       break
+               }
+               mem := x6.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstore)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVWUreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVWUreg w)) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               x0_1_0 := x0_1.Args[0]
+               if x0_1_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               if w != x0_1_0.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               x1_1_0 := x1_1.Args[0]
+               if x1_1_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               if w != x1_1_0.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               x2_1_0 := x2_1.Args[0]
+               if x2_1_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               if w != x2_1_0.Args[0] {
+                       break
+               }
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if ptr != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               x1 := x0.Args[2]
+               if x1.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if ptr != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x2 := x1.Args[2]
+               if x2.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-3 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if ptr != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 24 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               mem := x2.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REVW, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               if w != x_1.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVHUreg w)) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               x_1_0 := x_1.Args[0]
+               if x_1_0.Op != OpARM64MOVHUreg {
+                       break
+               }
+               if w != x_1_0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               w := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpARM64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if ptr != x.Args[0] {
+                       break
+               }
+               x_1 := x.Args[1]
+               if x_1.Op != OpARM64SRLconst {
+                       break
+               }
+               if x_1.AuxInt != 8 {
+                       break
+               }
+               x_1_0 := x_1.Args[0]
+               if x_1_0.Op != OpARM64MOVWUreg {
+                       break
+               }
+               if w != x_1_0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpARM64REV16W, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {