]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: inline constant sized memclrNoHeapPointers calls on loong64
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Fri, 11 Oct 2024 03:08:43 +0000 (11:08 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Thu, 24 Oct 2024 08:55:31 +0000 (08:55 +0000)
Tested that on loong64, the optimization effect is negative for
constant size cases greater than 512.
So only enable inlining for constant size cases less than 512.

goos: linux
goarch: loong64
pkg: runtime
cpu: Loongson-3A6000 @ 2500.00MHz
                      |  bench.old   |              bench.new               |
                      |    sec/op    |    sec/op     vs base                |
MemclrKnownSize1        2.4070n ± 0%   0.4004n ± 0%  -83.37% (p=0.000 n=20)
MemclrKnownSize2        2.1365n ± 0%   0.4004n ± 0%  -81.26% (p=0.000 n=20)
MemclrKnownSize4        2.4445n ± 0%   0.4004n ± 0%  -83.62% (p=0.000 n=20)
MemclrKnownSize8        2.4200n ± 0%   0.4004n ± 0%  -83.45% (p=0.000 n=20)
MemclrKnownSize16       2.8030n ± 0%   0.8007n ± 0%  -71.43% (p=0.000 n=20)
MemclrKnownSize32        2.803n ± 0%    1.602n ± 0%  -42.85% (p=0.000 n=20)
MemclrKnownSize64        3.250n ± 0%    2.402n ± 0%  -26.08% (p=0.000 n=20)
MemclrKnownSize112       6.006n ± 0%    2.819n ± 0%  -53.06% (p=0.000 n=20)
MemclrKnownSize128       6.006n ± 0%    3.240n ± 0%  -46.05% (p=0.000 n=20)
MemclrKnownSize192       6.807n ± 0%    5.205n ± 0%  -23.53% (p=0.000 n=20)
MemclrKnownSize248       7.608n ± 0%    6.301n ± 0%  -17.19% (p=0.000 n=20)
MemclrKnownSize256       7.608n ± 0%    6.707n ± 0%  -11.84% (p=0.000 n=20)
MemclrKnownSize512       13.61n ± 0%    13.61n ± 0%        ~ (p=0.374 n=20)
MemclrKnownSize1024      26.43n ± 0%    26.43n ± 0%        ~ (p=0.826 n=20)
MemclrKnownSize4096      103.3n ± 0%    103.3n ± 0%        ~ (p=1.000 n=20)
MemclrKnownSize512KiB    26.29µ ± 0%    26.29µ ± 0%   -0.00% (p=0.012 n=20)
geomean                  10.05n         5.006n       -50.18%

                      |  bench.old   |               bench.new                |
                      |     B/s      |      B/s       vs base                 |
MemclrKnownSize1        396.2Mi ± 0%   2381.9Mi ± 0%  +501.21% (p=0.000 n=20)
MemclrKnownSize2        892.8Mi ± 0%   4764.0Mi ± 0%  +433.59% (p=0.000 n=20)
MemclrKnownSize4        1.524Gi ± 0%    9.305Gi ± 0%  +510.56% (p=0.000 n=20)
MemclrKnownSize8        3.079Gi ± 0%   18.609Gi ± 0%  +504.42% (p=0.000 n=20)
MemclrKnownSize16       5.316Gi ± 0%   18.609Gi ± 0%  +250.05% (p=0.000 n=20)
MemclrKnownSize32       10.63Gi ± 0%    18.61Gi ± 0%   +75.00% (p=0.000 n=20)
MemclrKnownSize64       18.34Gi ± 0%    24.81Gi ± 0%   +35.27% (p=0.000 n=20)
MemclrKnownSize112      17.37Gi ± 0%    37.01Gi ± 0%  +113.08% (p=0.000 n=20)
MemclrKnownSize128      19.85Gi ± 0%    36.80Gi ± 0%   +85.39% (p=0.000 n=20)
MemclrKnownSize192      26.27Gi ± 0%    34.35Gi ± 0%   +30.77% (p=0.000 n=20)
MemclrKnownSize248      30.36Gi ± 0%    36.66Gi ± 0%   +20.75% (p=0.000 n=20)
MemclrKnownSize256      31.34Gi ± 0%    35.55Gi ± 0%   +13.43% (p=0.000 n=20)
MemclrKnownSize512      35.02Gi ± 0%    35.03Gi ± 0%    +0.00% (p=0.030 n=20)
MemclrKnownSize1024     36.09Gi ± 0%    36.09Gi ± 0%         ~ (p=0.101 n=20)
MemclrKnownSize4096     36.93Gi ± 0%    36.93Gi ± 0%    +0.00% (p=0.003 n=20)
MemclrKnownSize512KiB   18.57Gi ± 0%    18.57Gi ± 0%    +0.00% (p=0.041 n=20)
geomean                 10.13Gi         20.33Gi       +100.72%

Change-Id: I460a56f7ccc9f820ca2c1934c1c517b9614809ac
Reviewed-on: https://go-review.googlesource.com/c/go/+/621355
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Michael Pratt <mpratt@google.com>
src/cmd/compile/internal/ssa/rewrite.go
test/codegen/slices.go

index 71b8f09daf3fdeddbaee0deb76bee745e3bb4a29..45eb48ad6367c6f8c7c333143f7bd084d176028b 100644 (file)
@@ -1371,7 +1371,7 @@ func isInlinableMemclr(c *Config, sz int64) bool {
        switch c.arch {
        case "amd64", "arm64":
                return true
-       case "ppc64le", "ppc64":
+       case "ppc64le", "ppc64", "loong64":
                return sz < 512
        }
        return false
index a38fe77e3f669d11b2bcfb7b12789045b44c5858..9e8990c586efa67191a493315224d9cc8eaf86dd 100644 (file)
@@ -47,6 +47,7 @@ func SliceExtensionConst(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
+       // loong64:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
@@ -58,6 +59,7 @@ func SliceExtensionConstInt64(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
+       // loong64:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
@@ -69,6 +71,7 @@ func SliceExtensionConstUint64(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
+       // loong64:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
@@ -80,16 +83,18 @@ func SliceExtensionConstUint(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
+       // loong64:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
        return append(s, make([]int, uint(1<<2))...)
 }
 
-// On ppc64x continue to use memclrNoHeapPointers
+// On ppc64x and loong64 continue to use memclrNoHeapPointers
 // for sizes >= 512.
 func SliceExtensionConst512(s []int) []int {
        // amd64:-`.*runtime\.memclrNoHeapPointers`
+       // loong64:`.*runtime\.memclrNoHeapPointers`
        // ppc64x:`.*runtime\.memclrNoHeapPointers`
        return append(s, make([]int, 1<<9)...)
 }