]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: inline constant sized memclrNoHeapPointers calls on PPC64
authorLynn Boger <laboger@linux.vnet.ibm.com>
Tue, 31 Jan 2023 20:27:30 +0000 (14:27 -0600)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Thu, 23 Feb 2023 18:57:27 +0000 (18:57 +0000)
Update the function isInlinableMemclr for ppc64 and ppc64le
to enable inlining for the constant sized cases < 512.

Larger cases can use dcbz which performs better but requires
alignment checking so it is best to continue using memclrNoHeapPointers
for those cases.

Results on p10:

MemclrKnownSize1         2.07ns ± 0%     0.57ns ± 0%   -72.59%
MemclrKnownSize2         2.56ns ± 5%     0.57ns ± 0%   -77.82%
MemclrKnownSize4         5.15ns ± 0%     0.57ns ± 0%   -89.00%
MemclrKnownSize8         2.23ns ± 0%     0.57ns ± 0%   -74.57%
MemclrKnownSize16        2.23ns ± 0%     0.50ns ± 0%   -77.74%
MemclrKnownSize32        2.28ns ± 0%     0.56ns ± 0%   -75.28%
MemclrKnownSize64        2.49ns ± 0%     0.72ns ± 0%   -70.95%
MemclrKnownSize112       2.97ns ± 2%     1.14ns ± 0%   -61.72%
MemclrKnownSize128       4.64ns ± 6%     2.45ns ± 1%   -47.17%
MemclrKnownSize192       5.45ns ± 5%     2.79ns ± 0%   -48.87%
MemclrKnownSize248       4.51ns ± 0%     2.83ns ± 0%   -37.12%
MemclrKnownSize256       6.34ns ± 1%     3.58ns ± 0%   -43.53%
MemclrKnownSize512       3.64ns ± 0%     3.64ns ± 0%    -0.03%
MemclrKnownSize1024      4.73ns ± 0%     4.73ns ± 0%    +0.01%
MemclrKnownSize4096      17.1ns ± 0%     17.1ns ± 0%    +0.07%
MemclrKnownSize512KiB    2.12µs ± 0%     2.12µs ± 0%      ~     (all equal)

Change-Id: If1abf5749f4802c64523a41fe0058bd144d0ea46
Reviewed-on: https://go-review.googlesource.com/c/go/+/464340
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Jakub Ciolek <jakub@ciolek.dev>
Reviewed-by: Archana Ravindar <aravind5@in.ibm.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Than McIntosh <thanm@google.com>
src/cmd/compile/internal/ssa/_gen/generic.rules
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewritegeneric.go
test/codegen/slices.go

index 10da571988e5f2ce256d3b26153d5c0b2eb8881b..d72824c4bff5e5095e53abf272e0a4adcbff6ced 100644 (file)
 // Turn known-size calls to memclrNoHeapPointers into a Zero.
 // Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
 (SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))
-  && isInlinableMemclr(config)
+  && isInlinableMemclr(config, int64(c))
   && isSameCall(sym, "runtime.memclrNoHeapPointers")
   && call.Uses == 1
   && clobber(call)
index 0cf7917ec6e2336e6ab576f8c1946906b93832ca..c56447d336ada32e870adeec440468da022b6372 100644 (file)
@@ -1365,10 +1365,16 @@ func zeroUpper56Bits(x *Value, depth int) bool {
        return false
 }
 
-func isInlinableMemclr(c *Config) bool {
+func isInlinableMemclr(c *Config, sz int64) bool {
        // TODO: expand this check to allow other architectures
        // see CL 454255 and issue 56997
-       return c.arch == "amd64" || c.arch == "arm64"
+       switch c.arch {
+       case "amd64", "arm64":
+               return true
+       case "ppc64le", "ppc64":
+               return sz < 512
+       }
+       return false
 }
 
 // isInlinableMemmove reports whether the given arch performs a Move of the given size
index b81d0931190c1cdb5e4604fe24c91bf0dfed963e..49a721b5f2ade6c3fc9fea6131f3cba2766e8c7b 100644 (file)
@@ -26411,7 +26411,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
                return true
        }
        // match: (SelectN [0] call:(StaticCall {sym} sptr (Const64 [c]) mem))
-       // cond: isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
+       // cond: isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
        // result: (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)
        for {
                if auxIntToInt64(v.AuxInt) != 0 {
@@ -26429,7 +26429,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
                        break
                }
                c := auxIntToInt64(call_1.AuxInt)
-               if !(isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
+               if !(isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
                        break
                }
                v.reset(OpZero)
@@ -26439,7 +26439,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
                return true
        }
        // match: (SelectN [0] call:(StaticCall {sym} sptr (Const32 [c]) mem))
-       // cond: isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
+       // cond: isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
        // result: (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)
        for {
                if auxIntToInt64(v.AuxInt) != 0 {
@@ -26457,7 +26457,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
                        break
                }
                c := auxIntToInt32(call_1.AuxInt)
-               if !(isInlinableMemclr(config) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
+               if !(isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
                        break
                }
                v.reset(OpZero)
index e3be6bd76b1c0a615e7028595bd7c6e8894ae3eb..a38fe77e3f669d11b2bcfb7b12789045b44c5858 100644 (file)
@@ -47,7 +47,7 @@ func SliceExtensionConst(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
-       // ppc64x:`.*runtime\.memclrNoHeapPointers`
+       // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
        return append(s, make([]int, 1<<2)...)
@@ -58,7 +58,7 @@ func SliceExtensionConstInt64(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
-       // ppc64x:`.*runtime\.memclrNoHeapPointers`
+       // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
        return append(s, make([]int, int64(1<<2))...)
@@ -69,7 +69,7 @@ func SliceExtensionConstUint64(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
-       // ppc64x:`.*runtime\.memclrNoHeapPointers`
+       // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
        return append(s, make([]int, uint64(1<<2))...)
@@ -80,12 +80,20 @@ func SliceExtensionConstUint(s []int) []int {
        // amd64:-`.*runtime\.makeslice`
        // amd64:-`.*runtime\.panicmakeslicelen`
        // amd64:"MOVUPS\tX15"
-       // ppc64x:`.*runtime\.memclrNoHeapPointers`
+       // ppc64x:-`.*runtime\.memclrNoHeapPointers`
        // ppc64x:-`.*runtime\.makeslice`
        // ppc64x:-`.*runtime\.panicmakeslicelen`
        return append(s, make([]int, uint(1<<2))...)
 }
 
+// On ppc64x continue to use memclrNoHeapPointers
+// for sizes >= 512.
+func SliceExtensionConst512(s []int) []int {
+       // amd64:-`.*runtime\.memclrNoHeapPointers`
+       // ppc64x:`.*runtime\.memclrNoHeapPointers`
+       return append(s, make([]int, 1<<9)...)
+}
+
 func SliceExtensionPointer(s []*int, l int) []*int {
        // amd64:`.*runtime\.memclrHasPointers`
        // amd64:-`.*runtime\.makeslice`