]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: improve store-to-load forwarding with compatible types
authorAlexander Musman <alexander.musman@gmail.com>
Tue, 1 Apr 2025 15:43:38 +0000 (18:43 +0300)
committerGopher Robot <gobot@golang.org>
Fri, 4 Apr 2025 15:25:47 +0000 (08:25 -0700)
Improve the compiler's store-to-load forwarding optimization by relaxing the
type comparison condition. Instead of requiring exact type equality (CMPeq),
we now use copyCompatibleType which allows forwarding between compatible
types where safe.

Fix several size comparison bugs in the nested store patterns. Previously,
we were comparing the size of the outer store with the load type,
rather than comparing with the size of the actual store being forwarded
from.

Skip OpConvert in dead store elimination to help get rid of dead stores such
as zeroing slices. OpConvert, like OpInlMark, doesn't really use the memory.

This optimization is particularly beneficial for code that creates slices with
computed pointers, such as the runtime's heapBitsSlice function, where
intermediate calculations were previously causing the compiler to miss
store-to-load forwarding opportunities.

Local sweet run result on an x86_64 laptop:

                       │  Orig.res   │              Hopt.res              │
                       │   sec/op    │   sec/op     vs base               │
BiogoIgor-8               5.303 ± 1%    5.322 ± 1%       ~ (p=0.190 n=10)
BiogoKrishna-8            7.894 ± 1%    7.828 ± 2%       ~ (p=0.190 n=10)
BleveIndexBatch100-8      2.257 ± 1%    2.248 ± 2%       ~ (p=0.529 n=10)
EtcdPut-8                30.12m ± 1%   30.03m ± 1%       ~ (p=0.796 n=10)
EtcdSTM-8                127.1m ± 1%   126.2m ± 0%  -0.74% (p=0.023 n=10)
GoBuildKubelet-8          52.21 ± 0%    52.05 ± 1%       ~ (p=0.063 n=10)
GoBuildKubeletLink-8      4.342 ± 1%    4.305 ± 0%  -0.85% (p=0.000 n=10)
GoBuildIstioctl-8         43.33 ± 0%    43.24 ± 0%  -0.22% (p=0.015 n=10)
GoBuildIstioctlLink-8     4.604 ± 1%    4.598 ± 0%       ~ (p=0.063 n=10)
GoBuildFrontend-8         15.33 ± 0%    15.29 ± 0%       ~ (p=0.143 n=10)
GoBuildFrontendLink-8    740.0m ± 1%   737.7m ± 1%       ~ (p=0.912 n=10)
GopherLuaKNucleotide-8    9.590 ± 1%    9.656 ± 1%       ~ (p=0.165 n=10)
MarkdownRenderXHTML-8    96.97m ± 1%   97.26m ± 2%       ~ (p=0.105 n=10)
Tile38QueryLoad-8        335.9µ ± 1%   335.6µ ± 1%       ~ (p=0.481 n=10)
geomean                   1.336         1.333       -0.22%

Change-Id: I031552623e6d5a3b1b5be8325e6314706e45534f
Reviewed-on: https://go-review.googlesource.com/c/go/+/662075
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Carlos Amedee <carlos@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/_gen/generic.rules
src/cmd/compile/internal/ssa/deadstore.go
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewritegeneric.go
test/codegen/stack.go

index 02e4290b9d6abf0227ef782cceb1a3214ac717a7..eb04d03e496f5eeb102675b37be9947cbd8551d5 100644 (file)
 // Load of store of same address, with compatibly typed value and same size
 (Load <t1> p1 (Store {t2} p2 x _))
        && isSamePtr(p1, p2)
-       && t1.Compare(x.Type) == types.CMPeq
+       && copyCompatibleType(t1, x.Type)
        && t1.Size() == t2.Size()
        => x
 (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 x _)))
        && isSamePtr(p1, p3)
-       && t1.Compare(x.Type) == types.CMPeq
-       && t1.Size() == t2.Size()
+       && copyCompatibleType(t1, x.Type)
+       && t1.Size() == t3.Size()
        && disjoint(p3, t3.Size(), p2, t2.Size())
        => x
 (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 x _))))
        && isSamePtr(p1, p4)
-       && t1.Compare(x.Type) == types.CMPeq
-       && t1.Size() == t2.Size()
+       && copyCompatibleType(t1, x.Type)
+       && t1.Size() == t4.Size()
        && disjoint(p4, t4.Size(), p2, t2.Size())
        && disjoint(p4, t4.Size(), p3, t3.Size())
        => x
 (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 x _)))))
        && isSamePtr(p1, p5)
-       && t1.Compare(x.Type) == types.CMPeq
-       && t1.Size() == t2.Size()
+       && copyCompatibleType(t1, x.Type)
+       && t1.Size() == t5.Size()
        && disjoint(p5, t5.Size(), p2, t2.Size())
        && disjoint(p5, t5.Size(), p3, t3.Size())
        && disjoint(p5, t5.Size(), p4, t4.Size())
index 29cf1e91e0f1a6526735090c7ef899ed98683b65..f8c69dc6984d787b3aea367bdb903e5a6ecb10fa 100644 (file)
@@ -55,7 +55,7 @@ func dse(f *Func) {
                                        }
                                        continue
                                }
-                               if v.Op == OpInlMark {
+                               if v.Op == OpInlMark || v.Op == OpConvert {
                                        // Not really a use of the memory. See #67957.
                                        continue
                                }
index dd09330717c67791fd5c40826279841fa9e7456e..ed79d5154631ae1b27baa1ef6658c0b485864f40 100644 (file)
@@ -246,6 +246,19 @@ func isPtr(t *types.Type) bool {
        return t.IsPtrShaped()
 }
 
+func copyCompatibleType(t1, t2 *types.Type) bool {
+       if t1.Size() != t2.Size() {
+               return false
+       }
+       if t1.IsInteger() {
+               return t2.IsInteger()
+       }
+       if isPtr(t1) {
+               return isPtr(t2)
+       }
+       return t1.Compare(t2) == types.CMPeq
+}
+
 // mergeSym merges two symbolic offsets. There is no real merging of
 // offsets, we just pick the non-nil one.
 func mergeSym(x, y Sym) Sym {
@@ -822,7 +835,18 @@ func isSamePtr(p1, p2 *Value) bool {
                return true
        }
        if p1.Op != p2.Op {
-               return false
+               for p1.Op == OpOffPtr && p1.AuxInt == 0 {
+                       p1 = p1.Args[0]
+               }
+               for p2.Op == OpOffPtr && p2.AuxInt == 0 {
+                       p2 = p2.Args[0]
+               }
+               if p1 == p2 {
+                       return true
+               }
+               if p1.Op != p2.Op {
+                       return false
+               }
        }
        switch p1.Op {
        case OpOffPtr:
index 6f3cd659ef5c4c467ab0adedee9155e3c1ef9454..4fdb22b868ef6425e910260591f7b42587125295 100644 (file)
@@ -13599,7 +13599,7 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
        config := b.Func.Config
        typ := &b.Func.Config.Types
        // match: (Load <t1> p1 (Store {t2} p2 x _))
-       // cond: isSamePtr(p1, p2) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size()
+       // cond: isSamePtr(p1, p2) && copyCompatibleType(t1, x.Type) && t1.Size() == t2.Size()
        // result: x
        for {
                t1 := v.Type
@@ -13610,14 +13610,14 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
                t2 := auxToType(v_1.Aux)
                x := v_1.Args[1]
                p2 := v_1.Args[0]
-               if !(isSamePtr(p1, p2) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size()) {
+               if !(isSamePtr(p1, p2) && copyCompatibleType(t1, x.Type) && t1.Size() == t2.Size()) {
                        break
                }
                v.copyOf(x)
                return true
        }
        // match: (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 x _)))
-       // cond: isSamePtr(p1, p3) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p3, t3.Size(), p2, t2.Size())
+       // cond: isSamePtr(p1, p3) && copyCompatibleType(t1, x.Type) && t1.Size() == t3.Size() && disjoint(p3, t3.Size(), p2, t2.Size())
        // result: x
        for {
                t1 := v.Type
@@ -13635,14 +13635,14 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
                t3 := auxToType(v_1_2.Aux)
                x := v_1_2.Args[1]
                p3 := v_1_2.Args[0]
-               if !(isSamePtr(p1, p3) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p3, t3.Size(), p2, t2.Size())) {
+               if !(isSamePtr(p1, p3) && copyCompatibleType(t1, x.Type) && t1.Size() == t3.Size() && disjoint(p3, t3.Size(), p2, t2.Size())) {
                        break
                }
                v.copyOf(x)
                return true
        }
        // match: (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 x _))))
-       // cond: isSamePtr(p1, p4) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p4, t4.Size(), p2, t2.Size()) && disjoint(p4, t4.Size(), p3, t3.Size())
+       // cond: isSamePtr(p1, p4) && copyCompatibleType(t1, x.Type) && t1.Size() == t4.Size() && disjoint(p4, t4.Size(), p2, t2.Size()) && disjoint(p4, t4.Size(), p3, t3.Size())
        // result: x
        for {
                t1 := v.Type
@@ -13667,14 +13667,14 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
                t4 := auxToType(v_1_2_2.Aux)
                x := v_1_2_2.Args[1]
                p4 := v_1_2_2.Args[0]
-               if !(isSamePtr(p1, p4) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p4, t4.Size(), p2, t2.Size()) && disjoint(p4, t4.Size(), p3, t3.Size())) {
+               if !(isSamePtr(p1, p4) && copyCompatibleType(t1, x.Type) && t1.Size() == t4.Size() && disjoint(p4, t4.Size(), p2, t2.Size()) && disjoint(p4, t4.Size(), p3, t3.Size())) {
                        break
                }
                v.copyOf(x)
                return true
        }
        // match: (Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 x _)))))
-       // cond: isSamePtr(p1, p5) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p5, t5.Size(), p2, t2.Size()) && disjoint(p5, t5.Size(), p3, t3.Size()) && disjoint(p5, t5.Size(), p4, t4.Size())
+       // cond: isSamePtr(p1, p5) && copyCompatibleType(t1, x.Type) && t1.Size() == t5.Size() && disjoint(p5, t5.Size(), p2, t2.Size()) && disjoint(p5, t5.Size(), p3, t3.Size()) && disjoint(p5, t5.Size(), p4, t4.Size())
        // result: x
        for {
                t1 := v.Type
@@ -13706,7 +13706,7 @@ func rewriteValuegeneric_OpLoad(v *Value) bool {
                t5 := auxToType(v_1_2_2_2.Aux)
                x := v_1_2_2_2.Args[1]
                p5 := v_1_2_2_2.Args[0]
-               if !(isSamePtr(p1, p5) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.Size() && disjoint(p5, t5.Size(), p2, t2.Size()) && disjoint(p5, t5.Size(), p3, t3.Size()) && disjoint(p5, t5.Size(), p4, t4.Size())) {
+               if !(isSamePtr(p1, p5) && copyCompatibleType(t1, x.Type) && t1.Size() == t5.Size() && disjoint(p5, t5.Size(), p2, t2.Size()) && disjoint(p5, t5.Size(), p3, t3.Size()) && disjoint(p5, t5.Size(), p4, t4.Size())) {
                        break
                }
                v.copyOf(x)
index 65c9868d67086d950efc7a97175ddcd684525f2c..4e45d68f3816bd416a1d93a3cea77c30ef8575ca 100644 (file)
@@ -6,7 +6,10 @@
 
 package codegen
 
-import "runtime"
+import (
+       "runtime"
+       "unsafe"
+)
 
 // This file contains code generation tests related to the use of the
 // stack.
@@ -128,6 +131,29 @@ func spillSlotReuse() {
        getp2()[nopInt()] = 0
 }
 
+// Check that no stack frame space is needed for simple slice initialization with underlying structure.
+type mySlice struct {
+       array unsafe.Pointer
+       len   int
+       cap   int
+}
+
+// amd64:"TEXT\t.*, [$]0-"
+func sliceInit(base uintptr) []uintptr {
+       const ptrSize = 8
+       size := uintptr(4096)
+       bitmapSize := size / ptrSize / 8
+       elements := int(bitmapSize / ptrSize)
+       var sl mySlice
+       sl = mySlice{
+               unsafe.Pointer(base + size - bitmapSize),
+               elements,
+               elements,
+       }
+       // amd64:-"POPQ",-"SP"
+       return *(*[]uintptr)(unsafe.Pointer(&sl))
+}
+
 //go:noinline
 func nopInt() int {
        return 0