From 91d07ac71ce90bd27ae67de48b85db642f4431b0 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Fri, 11 Oct 2024 11:08:43 +0800 Subject: [PATCH] cmd/compile: inline constant sized memclrNoHeapPointers calls on loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Tested that on loong64, the optimization effect is negative for constant size cases greater than 512. So only enable inlining for constant size cases less than 512. goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | MemclrKnownSize1 2.4070n ± 0% 0.4004n ± 0% -83.37% (p=0.000 n=20) MemclrKnownSize2 2.1365n ± 0% 0.4004n ± 0% -81.26% (p=0.000 n=20) MemclrKnownSize4 2.4445n ± 0% 0.4004n ± 0% -83.62% (p=0.000 n=20) MemclrKnownSize8 2.4200n ± 0% 0.4004n ± 0% -83.45% (p=0.000 n=20) MemclrKnownSize16 2.8030n ± 0% 0.8007n ± 0% -71.43% (p=0.000 n=20) MemclrKnownSize32 2.803n ± 0% 1.602n ± 0% -42.85% (p=0.000 n=20) MemclrKnownSize64 3.250n ± 0% 2.402n ± 0% -26.08% (p=0.000 n=20) MemclrKnownSize112 6.006n ± 0% 2.819n ± 0% -53.06% (p=0.000 n=20) MemclrKnownSize128 6.006n ± 0% 3.240n ± 0% -46.05% (p=0.000 n=20) MemclrKnownSize192 6.807n ± 0% 5.205n ± 0% -23.53% (p=0.000 n=20) MemclrKnownSize248 7.608n ± 0% 6.301n ± 0% -17.19% (p=0.000 n=20) MemclrKnownSize256 7.608n ± 0% 6.707n ± 0% -11.84% (p=0.000 n=20) MemclrKnownSize512 13.61n ± 0% 13.61n ± 0% ~ (p=0.374 n=20) MemclrKnownSize1024 26.43n ± 0% 26.43n ± 0% ~ (p=0.826 n=20) MemclrKnownSize4096 103.3n ± 0% 103.3n ± 0% ~ (p=1.000 n=20) MemclrKnownSize512KiB 26.29µ ± 0% 26.29µ ± 0% -0.00% (p=0.012 n=20) geomean 10.05n 5.006n -50.18% | bench.old | bench.new | | B/s | B/s vs base | MemclrKnownSize1 396.2Mi ± 0% 2381.9Mi ± 0% +501.21% (p=0.000 n=20) MemclrKnownSize2 892.8Mi ± 0% 4764.0Mi ± 0% +433.59% (p=0.000 n=20) MemclrKnownSize4 1.524Gi ± 0% 9.305Gi ± 0% +510.56% (p=0.000 n=20) MemclrKnownSize8 3.079Gi ± 0% 18.609Gi ± 0% +504.42% (p=0.000 n=20) MemclrKnownSize16 5.316Gi ± 0% 18.609Gi ± 0% +250.05% (p=0.000 n=20) MemclrKnownSize32 10.63Gi ± 0% 18.61Gi ± 0% +75.00% (p=0.000 n=20) MemclrKnownSize64 18.34Gi ± 0% 24.81Gi ± 0% +35.27% (p=0.000 n=20) MemclrKnownSize112 17.37Gi ± 0% 37.01Gi ± 0% +113.08% (p=0.000 n=20) MemclrKnownSize128 19.85Gi ± 0% 36.80Gi ± 0% +85.39% (p=0.000 n=20) MemclrKnownSize192 26.27Gi ± 0% 34.35Gi ± 0% +30.77% (p=0.000 n=20) MemclrKnownSize248 30.36Gi ± 0% 36.66Gi ± 0% +20.75% (p=0.000 n=20) MemclrKnownSize256 31.34Gi ± 0% 35.55Gi ± 0% +13.43% (p=0.000 n=20) MemclrKnownSize512 35.02Gi ± 0% 35.03Gi ± 0% +0.00% (p=0.030 n=20) MemclrKnownSize1024 36.09Gi ± 0% 36.09Gi ± 0% ~ (p=0.101 n=20) MemclrKnownSize4096 36.93Gi ± 0% 36.93Gi ± 0% +0.00% (p=0.003 n=20) MemclrKnownSize512KiB 18.57Gi ± 0% 18.57Gi ± 0% +0.00% (p=0.041 n=20) geomean 10.13Gi 20.33Gi +100.72% Change-Id: I460a56f7ccc9f820ca2c1934c1c517b9614809ac Reviewed-on: https://go-review.googlesource.com/c/go/+/621355 LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc Reviewed-by: Michael Pratt --- src/cmd/compile/internal/ssa/rewrite.go | 2 +- test/codegen/slices.go | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 71b8f09daf..45eb48ad63 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1371,7 +1371,7 @@ func isInlinableMemclr(c *Config, sz int64) bool { switch c.arch { case "amd64", "arm64": return true - case "ppc64le", "ppc64": + case "ppc64le", "ppc64", "loong64": return sz < 512 } return false diff --git a/test/codegen/slices.go b/test/codegen/slices.go index a38fe77e3f..9e8990c586 100644 --- a/test/codegen/slices.go +++ b/test/codegen/slices.go @@ -47,6 +47,7 @@ func SliceExtensionConst(s []int) []int { // amd64:-`.*runtime\.makeslice` // amd64:-`.*runtime\.panicmakeslicelen` // amd64:"MOVUPS\tX15" + // loong64:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.makeslice` // ppc64x:-`.*runtime\.panicmakeslicelen` @@ -58,6 +59,7 @@ func SliceExtensionConstInt64(s []int) []int { // amd64:-`.*runtime\.makeslice` // amd64:-`.*runtime\.panicmakeslicelen` // amd64:"MOVUPS\tX15" + // loong64:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.makeslice` // ppc64x:-`.*runtime\.panicmakeslicelen` @@ -69,6 +71,7 @@ func SliceExtensionConstUint64(s []int) []int { // amd64:-`.*runtime\.makeslice` // amd64:-`.*runtime\.panicmakeslicelen` // amd64:"MOVUPS\tX15" + // loong64:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.makeslice` // ppc64x:-`.*runtime\.panicmakeslicelen` @@ -80,16 +83,18 @@ func SliceExtensionConstUint(s []int) []int { // amd64:-`.*runtime\.makeslice` // amd64:-`.*runtime\.panicmakeslicelen` // amd64:"MOVUPS\tX15" + // loong64:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.memclrNoHeapPointers` // ppc64x:-`.*runtime\.makeslice` // ppc64x:-`.*runtime\.panicmakeslicelen` return append(s, make([]int, uint(1<<2))...) } -// On ppc64x continue to use memclrNoHeapPointers +// On ppc64x and loong64 continue to use memclrNoHeapPointers // for sizes >= 512. func SliceExtensionConst512(s []int) []int { // amd64:-`.*runtime\.memclrNoHeapPointers` + // loong64:`.*runtime\.memclrNoHeapPointers` // ppc64x:`.*runtime\.memclrNoHeapPointers` return append(s, make([]int, 1<<9)...) } -- 2.48.1