]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal/ssa: combine zero stores into larger stores on arm64
authorChad Rosier <mrosier.qdt@qualcommdatacenter.com>
Fri, 23 Feb 2018 20:17:54 +0000 (15:17 -0500)
committerCherry Zhang <cherryyz@google.com>
Tue, 27 Feb 2018 00:07:25 +0000 (00:07 +0000)
This reduces the go tool binary on arm64 by 12k.

go1 results on Amberwing:
name                   old time/op    new time/op    delta
RegexpMatchEasy0_32       249ns ± 0%     249ns ± 0%    ~     (p=0.087 n=10+10)
RegexpMatchEasy0_1K       584ns ± 0%     584ns ± 0%    ~     (all equal)
RegexpMatchEasy1_32       246ns ± 0%     246ns ± 0%    ~     (p=1.000 n=10+10)
RegexpMatchEasy1_1K       806ns ± 0%     806ns ± 0%    ~     (p=0.706 n=10+9)
RegexpMatchMedium_32      314ns ± 0%     314ns ± 0%    ~     (all equal)
RegexpMatchMedium_1K     52.1µs ± 0%    52.1µs ± 0%    ~     (p=0.245 n=10+8)
RegexpMatchHard_32       2.75µs ± 1%    2.75µs ± 1%    ~     (p=0.690 n=10+10)
RegexpMatchHard_1K       78.9µs ± 0%    78.9µs ± 1%    ~     (p=0.295 n=9+9)
FmtFprintfEmpty          58.5ns ± 0%    58.5ns ± 0%    ~     (all equal)
FmtFprintfString          112ns ± 0%     112ns ± 0%    ~     (all equal)
FmtFprintfInt             117ns ± 0%     116ns ± 0%  -0.85%  (p=0.000 n=10+10)
FmtFprintfIntInt          181ns ± 0%     181ns ± 0%    ~     (all equal)
FmtFprintfPrefixedInt     222ns ± 0%     224ns ± 0%  +0.90%  (p=0.000 n=9+10)
FmtFprintfFloat           318ns ± 1%     322ns ± 0%    ~     (p=0.059 n=10+8)
FmtManyArgs               736ns ± 1%     735ns ± 0%    ~     (p=0.206 n=9+9)
Gzip                      437ms ± 0%     436ms ± 0%  -0.25%  (p=0.000 n=10+10)
HTTPClientServer         89.8µs ± 1%    90.2µs ± 2%    ~     (p=0.393 n=10+10)
JSONEncode               20.1ms ± 1%    20.2ms ± 1%    ~     (p=0.065 n=9+10)
JSONDecode               94.2ms ± 1%    93.9ms ± 1%  -0.42%  (p=0.043 n=10+10)
GobDecode                12.7ms ± 1%    12.8ms ± 2%  +0.94%  (p=0.019 n=10+10)
GobEncode                12.1ms ± 0%    12.1ms ± 0%    ~     (p=0.052 n=10+10)
Mandelbrot200            5.06ms ± 0%    5.05ms ± 0%  -0.04%  (p=0.000 n=9+10)
TimeParse                 450ns ± 3%     446ns ± 0%    ~     (p=0.238 n=10+9)
TimeFormat                485ns ± 1%     483ns ± 1%    ~     (p=0.073 n=10+10)
Template                 90.4ms ± 0%    90.7ms ± 0%  +0.29%  (p=0.000 n=8+10)
GoParse                  6.01ms ± 0%    6.03ms ± 0%  +0.35%  (p=0.000 n=10+10)
BinaryTree17              11.7s ± 0%     11.7s ± 0%    ~     (p=0.481 n=10+10)
Revcomp                   669ms ± 0%     669ms ± 0%    ~     (p=0.315 n=10+10)
Fannkuch11                3.40s ± 0%     3.37s ± 0%  -0.92%  (p=0.000 n=10+10)
[Geo mean]               67.9µs         67.9µs       +0.02%

name                   old speed      new speed      delta
RegexpMatchEasy0_32     128MB/s ± 0%   128MB/s ± 0%  -0.08%  (p=0.003 n=8+10)
RegexpMatchEasy0_1K    1.75GB/s ± 0%  1.75GB/s ± 0%    ~     (p=0.642 n=8+10)
RegexpMatchEasy1_32     130MB/s ± 0%   130MB/s ± 0%    ~     (p=0.690 n=10+9)
RegexpMatchEasy1_1K    1.27GB/s ± 0%  1.27GB/s ± 0%    ~     (p=0.661 n=10+9)
RegexpMatchMedium_32   3.18MB/s ± 0%  3.18MB/s ± 0%    ~     (all equal)
RegexpMatchMedium_1K   19.7MB/s ± 0%  19.6MB/s ± 0%    ~     (p=0.190 n=10+9)
RegexpMatchHard_32     11.6MB/s ± 0%  11.6MB/s ± 1%    ~     (p=0.669 n=10+10)
RegexpMatchHard_1K     13.0MB/s ± 0%  13.0MB/s ± 0%    ~     (p=0.718 n=9+9)
Gzip                   44.4MB/s ± 0%  44.5MB/s ± 0%  +0.24%  (p=0.000 n=10+10)
JSONEncode             96.5MB/s ± 1%  96.1MB/s ± 1%    ~     (p=0.065 n=9+10)
JSONDecode             20.6MB/s ± 1%  20.7MB/s ± 1%  +0.42%  (p=0.041 n=10+10)
GobDecode              60.6MB/s ± 1%  60.0MB/s ± 2%  -0.92%  (p=0.016 n=10+10)
GobEncode              63.4MB/s ± 0%  63.6MB/s ± 0%    ~     (p=0.055 n=10+10)
Template               21.5MB/s ± 0%  21.4MB/s ± 0%  -0.30%  (p=0.000 n=9+10)
GoParse                9.64MB/s ± 0%  9.61MB/s ± 0%  -0.36%  (p=0.000 n=10+10)
Revcomp                 380MB/s ± 0%   380MB/s ± 0%    ~     (p=0.323 n=10+10)
[Geo mean]             56.0MB/s       55.9MB/s       -0.07%

Change-Id: Ia732fa57fbcf4767d72382516d9f16705d177736
Reviewed-on: https://go-review.googlesource.com/96435
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/gc/asm_test.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewriteARM64.go

index 6f34740239880dd92afb4867f76d7a07b03e5775..750ac75192019c0236bbb55bb2d28e2aea4438f0 100644 (file)
@@ -2971,6 +2971,232 @@ var linuxARM64Tests = []*asmTest{
                `,
                pos: []string{"\tCSEL\t"},
        },
+       // Check that zero stores are combine into larger stores
+       {
+               fn: `
+               func $(b []byte) {
+                       _ = b[1] // early bounds check to guarantee safety of writes below
+                       b[0] = 0
+                       b[1] = 0
+               }
+               `,
+               pos: []string{"MOVH\tZR"},
+               neg: []string{"MOVB"},
+       },
+       {
+               fn: `
+               func $(b []byte) {
+                       _ = b[1] // early bounds check to guarantee safety of writes below
+                       b[1] = 0
+                       b[0] = 0
+               }
+               `,
+               pos: []string{"MOVH\tZR"},
+               neg: []string{"MOVB"},
+       },
+       {
+               fn: `
+               func $(b []byte) {
+                       _ = b[3] // early bounds check to guarantee safety of writes below
+                       b[0] = 0
+                       b[1] = 0
+                       b[2] = 0
+                       b[3] = 0
+               }
+               `,
+               pos: []string{"MOVW\tZR"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(b []byte) {
+                       _ = b[3] // early bounds check to guarantee safety of writes below
+                       b[2] = 0
+                       b[3] = 0
+                       b[1] = 0
+                       b[0] = 0
+               }
+               `,
+               pos: []string{"MOVW\tZR"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(h []uint16) {
+                       _ = h[1] // early bounds check to guarantee safety of writes below
+                       h[0] = 0
+                       h[1] = 0
+               }
+               `,
+               pos: []string{"MOVW\tZR"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(h []uint16) {
+                       _ = h[1] // early bounds check to guarantee safety of writes below
+                       h[1] = 0
+                       h[0] = 0
+               }
+               `,
+               pos: []string{"MOVW\tZR"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(b []byte) {
+                       _ = b[7] // early bounds check to guarantee safety of writes below
+                       b[0] = 0
+                       b[1] = 0
+                       b[2] = 0
+                       b[3] = 0
+                       b[4] = 0
+                       b[5] = 0
+                       b[6] = 0
+                       b[7] = 0
+               }
+               `,
+               pos: []string{"MOVD\tZR"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
+       {
+               fn: `
+               func $(h []uint16) {
+                       _ = h[3] // early bounds check to guarantee safety of writes below
+                       h[0] = 0
+                       h[1] = 0
+                       h[2] = 0
+                       h[3] = 0
+               }
+               `,
+               pos: []string{"MOVD\tZR"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
+       {
+               fn: `
+               func $(h []uint16) {
+                       _ = h[3] // early bounds check to guarantee safety of writes below
+                       h[2] = 0
+                       h[3] = 0
+                       h[1] = 0
+                       h[0] = 0
+               }
+               `,
+               pos: []string{"MOVD\tZR"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
+       {
+               fn: `
+               func $(w []uint32) {
+                       _ = w[1] // early bounds check to guarantee safety of writes below
+                       w[0] = 0
+                       w[1] = 0
+               }
+               `,
+               pos: []string{"MOVD\tZR"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
+       {
+               fn: `
+               func $(w []uint32) {
+                       _ = w[1] // early bounds check to guarantee safety of writes below
+                       w[1] = 0
+                       w[0] = 0
+               }
+               `,
+               pos: []string{"MOVD\tZR"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
+       {
+               fn: `
+               func $(b []byte) {
+                       _ = b[15] // early bounds check to guarantee safety of writes below
+                       b[0] = 0
+                       b[1] = 0
+                       b[2] = 0
+                       b[3] = 0
+                       b[4] = 0
+                       b[5] = 0
+                       b[6] = 0
+                       b[7] = 0
+                       b[8] = 0
+                       b[9] = 0
+                       b[10] = 0
+                       b[11] = 0
+                       b[12] = 0
+                       b[13] = 0
+                       b[15] = 0
+                       b[14] = 0
+               }
+               `,
+               pos: []string{"STP"},
+               neg: []string{"MOVB", "MOVH", "MOVW"},
+       },
+       {
+               fn: `
+               func $(h []uint16) {
+                       _ = h[7] // early bounds check to guarantee safety of writes below
+                       h[0] = 0
+                       h[1] = 0
+                       h[2] = 0
+                       h[3] = 0
+                       h[4] = 0
+                       h[5] = 0
+                       h[6] = 0
+                       h[7] = 0
+               }
+               `,
+               pos: []string{"STP"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(w []uint32) {
+                       _ = w[3] // early bounds check to guarantee safety of writes below
+                       w[0] = 0
+                       w[1] = 0
+                       w[2] = 0
+                       w[3] = 0
+               }
+               `,
+               pos: []string{"STP"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(w []uint32) {
+                       _ = w[3] // early bounds check to guarantee safety of writes below
+                       w[1] = 0
+                       w[0] = 0
+                       w[3] = 0
+                       w[2] = 0
+               }
+               `,
+               pos: []string{"STP"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(d []uint64) {
+                       _ = d[1] // early bounds check to guarantee safety of writes below
+                       d[0] = 0
+                       d[1] = 0
+               }
+               `,
+               pos: []string{"STP"},
+               neg: []string{"MOVB", "MOVH"},
+       },
+       {
+               fn: `
+               func $(d []uint64) {
+                       _ = d[1] // early bounds check to guarantee safety of writes below
+                       d[1] = 0
+                       d[0] = 0
+               }
+               `,
+               pos: []string{"STP"},
+               neg: []string{"MOVB", "MOVH"},
+       },
 }
 
 var linuxMIPSTests = []*asmTest{
index c5774edbd34a40bfca38a2d2f98348764ef66137..9f6ef57d434b94134e5947b03f8e905483864926 100644 (file)
        && clobber(o4) && clobber(o5) && clobber(s0)
        -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
 
+// Combine zero stores into larger (unaligned) stores.
+(MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
+       && x.Uses == 1
+       && areAdjacentOffsets(i,j,1)
+       && is32Bit(min(i,j))
+       && isSamePtr(ptr0, ptr1)
+       && clobber(x)
+       -> (MOVHstorezero [min(i,j)] {s} ptr0 mem)
+(MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
+       && x.Uses == 1
+       && areAdjacentOffsets(i,j,2)
+       && is32Bit(min(i,j))
+       && isSamePtr(ptr0, ptr1)
+       && clobber(x)
+       -> (MOVWstorezero [min(i,j)] {s} ptr0 mem)
+(MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
+       && x.Uses == 1
+       && areAdjacentOffsets(i,j,4)
+       && is32Bit(min(i,j))
+       && isSamePtr(ptr0, ptr1)
+       && clobber(x)
+       -> (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+(MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
+       && x.Uses == 1
+       && areAdjacentOffsets(i,j,8)
+       && is32Bit(min(i,j))
+       && isSamePtr(ptr0, ptr1)
+       && clobber(x)
+       -> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+
 // FP simplification
 (FNEGS (FMULS x y)) -> (FNMULS x y)
 (FNEGD (FMULD x y)) -> (FNMULD x y)
index 587a2a6d1a068bce7ca59ddef79b78eb963f9369..2a20519f038c2e25223a59a919b9213d9d7c4362 100644 (file)
@@ -769,6 +769,10 @@ func overlap(offset1, size1, offset2, size2 int64) bool {
        return false
 }
 
+func areAdjacentOffsets(off1, off2, size int64) bool {
+       return off1+size == off2 || off1 == off2+size
+}
+
 // check if value zeroes out upper 32-bit of 64-bit register.
 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
 // because it catches same amount of cases as 4.
index 81be85c63a2a7881f1b0ea2d4809d45348f4806a..1bb21d8a2c30f9b9f178d2ac4f8192a4f5ec385d 100644 (file)
@@ -5941,6 +5941,35 @@ func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVBstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVHstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
@@ -6205,6 +6234,35 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVDstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVQstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
@@ -6747,6 +6805,35 @@ func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVHstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVWstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
@@ -7379,6 +7466,35 @@ func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
+       // cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
+       // result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               ptr0 := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpARM64MOVWstorezero {
+                       break
+               }
+               j := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[1]
+               ptr1 := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
+                       break
+               }
+               v.reset(OpARM64MOVDstorezero)
+               v.AuxInt = min(i, j)
+               v.Aux = s
+               v.AddArg(ptr0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {