cmd/compile: make dse track multiple shadowed ranges

author Jakub Ciolek <jakub@ciolek.dev>

Wed, 21 May 2025 07:34:48 +0000 (09:34 +0200)

committer Gopher Robot <gobot@golang.org>

Fri, 21 Nov 2025 20:37:12 +0000 (12:37 -0800)
author Jakub Ciolek <jakub@ciolek.dev>
Wed, 21 May 2025 07:34:48 +0000 (09:34 +0200)
committer Gopher Robot <gobot@golang.org>
Fri, 21 Nov 2025 20:37:12 +0000 (12:37 -0800)
diff --git a/src/cmd/compile/internal/ssa/deadstore.go b/src/cmd/compile/internal/ssa/deadstore.go

index cdf290e2aa8e4fc6c7614e03f2ab507cc1267009..17a0809cb70e9869d81f75fa4f5d225547c774ee 100644 (file)
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@@ -10,6 +10,10 @@ import (
         "cmd/internal/obj"
  )
  
+// maxShadowRanges bounds the number of disjoint byte intervals
+// we track per pointer to avoid quadratic behaviour.
+const maxShadowRanges = 64
+
  // dse does dead-store elimination on the Function.
  // Dead stores are those which are unconditionally followed by
  // another store to the same location, with no intervening load.
@@ -24,6 +28,10 @@ func dse(f *Func) {
         defer f.retSparseMap(shadowed)
         // localAddrs maps from a local variable (the Aux field of a LocalAddr value) to an instance of a LocalAddr value for that variable in the current block.
         localAddrs := map[any]*Value{}
+
+       // shadowedRanges stores the actual range data. The 'shadowed' sparseMap stores a 1-based index into this slice.
+       var shadowedRanges []*shadowRanges
+
         for _, b := range f.Blocks {
                 // Find all the stores in this block. Categorize their uses:
                 //  loadUse contains stores which are used by a subsequent load.
@@ -89,10 +97,11 @@ func dse(f *Func) {
                 // Walk backwards looking for dead stores. Keep track of shadowed addresses.
                 // A "shadowed address" is a pointer, offset, and size describing a memory region that
                 // is known to be written. We keep track of shadowed addresses in the shadowed map,
-               // mapping the ID of the address to a shadowRange where future writes will happen.
+               // mapping the ID of the address to a shadowRanges where future writes will happen.
                 // Since we're walking backwards, writes to a shadowed region are useless,
                 // as they will be immediately overwritten.
                 shadowed.clear()
+               shadowedRanges = shadowedRanges[:0]
                 v := last
  
         walkloop:
@@ -100,6 +109,7 @@ func dse(f *Func) {
                         // Someone might be reading this memory state.
                         // Clear all shadowed addresses.
                         shadowed.clear()
+                       shadowedRanges = shadowedRanges[:0]
                 }
                 if v.Op == OpStore || v.Op == OpZero {
                         ptr := v.Args[0]
@@ -119,9 +129,14 @@ func dse(f *Func) {
                                         ptr = la
                                 }
                         }
-                       srNum, _ := shadowed.get(ptr.ID)
-                       sr := shadowRange(srNum)
-                       if sr.contains(off, off+sz) {
+                       var si *shadowRanges
+                       idx, ok := shadowed.get(ptr.ID)
+                       if ok {
+                               // The sparseMap stores a 1-based index, so we subtract 1.
+                               si = shadowedRanges[idx-1]
+                       }
+
+                       if si != nil && si.contains(off, off+sz) {
                                 // Modify the store/zero into a copy of the memory state,
                                 // effectively eliding the store operation.
                                 if v.Op == OpStore {
@@ -136,7 +151,13 @@ func dse(f *Func) {
                                 v.Op = OpCopy
                         } else {
                                 // Extend shadowed region.
-                               shadowed.set(ptr.ID, int32(sr.merge(off, off+sz)))
+                               if si == nil {
+                                       si = &shadowRanges{}
+                                       shadowedRanges = append(shadowedRanges, si)
+                                       // Store a 1-based index in the sparseMap.
+                                       shadowed.set(ptr.ID, int32(len(shadowedRanges)))
+                               }
+                               si.add(off, off+sz)
                         }
                 }
                 // walk to previous store
@@ -156,46 +177,51 @@ func dse(f *Func) {
         }
  }
  
-// A shadowRange encodes a set of byte offsets [lo():hi()] from
-// a given pointer that will be written to later in the block.
-// A zero shadowRange encodes an empty shadowed range.
-type shadowRange int32
-
-func (sr shadowRange) lo() int64 {
-       return int64(sr & 0xffff)
+// shadowRange represents a single byte range [lo,hi] that will be written.
+type shadowRange struct {
+       lo, hi uint16
  }
  
-func (sr shadowRange) hi() int64 {
-       return int64((sr >> 16) & 0xffff)
+// shadowRanges stores an unordered collection of disjoint byte ranges.
+type shadowRanges struct {
+       ranges []shadowRange
  }
  
  // contains reports whether [lo:hi] is completely within sr.
-func (sr shadowRange) contains(lo, hi int64) bool {
-       return lo >= sr.lo() && hi <= sr.hi()
+func (sr *shadowRanges) contains(lo, hi int64) bool {
+       for _, r := range sr.ranges {
+               if lo >= int64(r.lo) && hi <= int64(r.hi) {
+                       return true
+               }
+       }
+       return false
  }
  
-// merge returns the union of sr and [lo:hi].
-// merge is allowed to return something smaller than the union.
-func (sr shadowRange) merge(lo, hi int64) shadowRange {
-       if lo < 0 || hi > 0xffff {
-               // Ignore offsets that are too large or small.
-               return sr
-       }
-       if sr.lo() == sr.hi() {
-               // Old range is empty - use new one.
-               return shadowRange(lo + hi<<16)
+func (sr *shadowRanges) add(lo, hi int64) {
+       // Ignore the store if:
+       // - the range doesn't fit in 16 bits, or
+       // - we already track maxShadowRanges intervals.
+       // The cap prevents a theoretical O(n^2) blow-up.
+       if lo < 0 || hi > 0xffff || len(sr.ranges) >= maxShadowRanges {
+               return
         }
-       if hi < sr.lo() || lo > sr.hi() {
-               // The two regions don't overlap or abut, so we would
-               // have to keep track of multiple disjoint ranges.
-               // Because we can only keep one, keep the larger one.
-               if sr.hi()-sr.lo() >= hi-lo {
-                       return sr
+       nlo := lo
+       nhi := hi
+       out := sr.ranges[:0]
+
+       for _, r := range sr.ranges {
+               if nhi < int64(r.lo) || nlo > int64(r.hi) {
+                       out = append(out, r)
+                       continue
+               }
+               if int64(r.lo) < nlo {
+                       nlo = int64(r.lo)
+               }
+               if int64(r.hi) > nhi {
+                       nhi = int64(r.hi)
                 }
-               return shadowRange(lo + hi<<16)
         }
-       // Regions overlap or abut - compute the union.
-       return shadowRange(min(lo, sr.lo()) + max(hi, sr.hi())<<16)
+       sr.ranges = append(out, shadowRange{uint16(nlo), uint16(nhi)})
  }
  
  // elimDeadAutosGeneric deletes autos that are never accessed. To achieve this
diff --git a/src/cmd/compile/internal/ssa/deadstore_test.go b/src/cmd/compile/internal/ssa/deadstore_test.go

index 4ccd6b8e91bf12cbacc1e1eefeedec8df20e0138..7c7a4dacf01c568f27dd72a8cb85a1d29a64ab77 100644 (file)
--- a/src/cmd/compile/internal/ssa/deadstore_test.go
+++ b/src/cmd/compile/internal/ssa/deadstore_test.go
@@ -7,6 +7,8 @@ package ssa
  import (
         "cmd/compile/internal/types"
         "cmd/internal/src"
+       "fmt"
+       "sort"
         "testing"
  )
  
@@ -172,3 +174,335 @@ func TestDeadStoreSmallStructInit(t *testing.T) {
                 t.Errorf("dead store not removed")
         }
  }
+
+func TestDeadStoreArrayGap(t *testing.T) {
+       c := testConfig(t)
+       ptr := c.config.Types.BytePtr
+       i64 := c.config.Types.Int64
+
+       typ := types.NewArray(i64, 5)
+       tmp := c.Temp(typ)
+
+       fun := c.Fun("entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil),
+
+                       Valu("base", OpLocalAddr, ptr, 0, tmp, "sp", "start"),
+
+                       Valu("p0", OpOffPtr, ptr, 0, nil, "base"),
+                       Valu("p1", OpOffPtr, ptr, 8, nil, "base"),
+                       Valu("p2", OpOffPtr, ptr, 16, nil, "base"),
+                       Valu("p3", OpOffPtr, ptr, 24, nil, "base"),
+                       Valu("p4", OpOffPtr, ptr, 32, nil, "base"),
+
+                       Valu("one", OpConst64, i64, 1, nil),
+                       Valu("seven", OpConst64, i64, 7, nil),
+                       Valu("zero", OpConst64, i64, 0, nil),
+
+                       Valu("mem0", OpZero, types.TypeMem, 40, typ, "base", "start"),
+
+                       Valu("s0", OpStore, types.TypeMem, 0, i64, "p0", "one", "mem0"),
+                       Valu("s1", OpStore, types.TypeMem, 0, i64, "p1", "seven", "s0"),
+                       Valu("s2", OpStore, types.TypeMem, 0, i64, "p3", "one", "s1"),
+                       Valu("s3", OpStore, types.TypeMem, 0, i64, "p4", "one", "s2"),
+                       Valu("s4", OpStore, types.TypeMem, 0, i64, "p2", "zero", "s3"),
+
+                       Goto("exit")),
+               Bloc("exit",
+                       Exit("s4")))
+
+       CheckFunc(fun.f)
+       dse(fun.f)
+       CheckFunc(fun.f)
+
+       if op := fun.values["mem0"].Op; op != OpCopy {
+               t.Fatalf("dead Zero not removed: got %s, want OpCopy", op)
+       }
+}
+
+func TestShadowRanges(t *testing.T) {
+       t.Run("simple insert & contains", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(10, 20)
+
+               wantRanges(t, sr.ranges, [][2]uint16{{10, 20}})
+               if !sr.contains(12, 18) || !sr.contains(10, 20) {
+                       t.Fatalf("contains failed after simple add")
+               }
+               if sr.contains(9, 11) || sr.contains(11, 21) {
+                       t.Fatalf("contains erroneously true for non-contained range")
+               }
+       })
+
+       t.Run("merge overlapping", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(10, 20)
+               sr.add(15, 25)
+
+               wantRanges(t, sr.ranges, [][2]uint16{{10, 25}})
+               if !sr.contains(13, 24) {
+                       t.Fatalf("contains should be true after merge")
+               }
+       })
+
+       t.Run("merge touching boundary", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(100, 150)
+               // touches at 150 - should coalesce
+               sr.add(150, 180)
+
+               wantRanges(t, sr.ranges, [][2]uint16{{100, 180}})
+       })
+
+       t.Run("union across several ranges", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(10, 20)
+               sr.add(30, 40)
+               // bridges second, not first
+               sr.add(25, 35)
+
+               wantRanges(t, sr.ranges, [][2]uint16{{10, 20}, {25, 40}})
+
+               // envelops everything
+               sr.add(5, 50)
+               wantRanges(t, sr.ranges, [][2]uint16{{5, 50}})
+       })
+
+       t.Run("disjoint intervals stay separate", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(10, 20)
+               sr.add(22, 30)
+
+               wantRanges(t, sr.ranges, [][2]uint16{{10, 20}, {22, 30}})
+               // spans both
+               if sr.contains(15, 25) {
+                       t.Fatalf("contains across two disjoint ranges should be false")
+               }
+       })
+
+       t.Run("large uint16 offsets still work", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(40000, 45000)
+
+               if !sr.contains(42000, 43000) {
+                       t.Fatalf("contains failed for large uint16 values")
+               }
+       })
+
+       t.Run("out-of-bounds inserts ignored", func(t *testing.T) {
+               var sr shadowRanges
+               sr.add(10, 20)
+               sr.add(-5, 5)
+               sr.add(70000, 70010)
+
+               wantRanges(t, sr.ranges, [][2]uint16{{10, 20}})
+       })
+}
+
+// canonicalise order for comparisons
+func sortRanges(r []shadowRange) {
+       sort.Slice(r, func(i, j int) bool { return r[i].lo < r[j].lo })
+}
+
+// compare actual slice with expected pairs
+func wantRanges(t *testing.T, got []shadowRange, want [][2]uint16) {
+       t.Helper()
+       sortRanges(got)
+
+       if len(got) != len(want) {
+               t.Fatalf("len(ranges)=%d, want %d (got=%v)", len(got), len(want), got)
+       }
+
+       for i, w := range want {
+               if got[i].lo != w[0] || got[i].hi != w[1] {
+                       t.Fatalf("range %d = [%d,%d], want [%d,%d] (full=%v)",
+                               i, got[i].lo, got[i].hi, w[0], w[1], got)
+               }
+       }
+}
+
+func BenchmarkDeadStore(b *testing.B) {
+       cfg := testConfig(b)
+       ptr := cfg.config.Types.BytePtr
+
+       f := cfg.Fun("entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil),
+                       Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil),
+                       Valu("a1", OpAddr, ptr, 0, nil, "sb"),
+                       Valu("a2", OpAddr, ptr, 0, nil, "sb"),
+                       Valu("a3", OpAddr, ptr, 0, nil, "sb"),
+                       Valu("z1", OpZero, types.TypeMem, 1, cfg.config.Types.Bool, "a3", "start"),
+                       Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "z1"),
+                       Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a2", "v", "s1"),
+                       Valu("s3", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "s2"),
+                       Valu("s4", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a3", "v", "s3"),
+                       Goto("exit")),
+               Bloc("exit",
+                       Exit("s3")))
+
+       runBench(b, func() {
+               dse(f.f)
+       })
+}
+
+func BenchmarkDeadStorePhi(b *testing.B) {
+       cfg := testConfig(b)
+       ptr := cfg.config.Types.BytePtr
+
+       f := cfg.Fun("entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil),
+                       Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil),
+                       Valu("addr", OpAddr, ptr, 0, nil, "sb"),
+                       Goto("loop")),
+               Bloc("loop",
+                       Valu("phi", OpPhi, types.TypeMem, 0, nil, "start", "store"),
+                       Valu("store", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "addr", "v", "phi"),
+                       If("v", "loop", "exit")),
+               Bloc("exit",
+                       Exit("store")))
+
+       runBench(b, func() {
+               dse(f.f)
+       })
+}
+
+func BenchmarkDeadStoreTypes(b *testing.B) {
+       cfg := testConfig(b)
+
+       t1 := cfg.config.Types.UInt64.PtrTo()
+       t2 := cfg.config.Types.UInt32.PtrTo()
+
+       f := cfg.Fun("entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil),
+                       Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil),
+                       Valu("a1", OpAddr, t1, 0, nil, "sb"),
+                       Valu("a2", OpAddr, t2, 0, nil, "sb"),
+                       Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "start"),
+                       Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a2", "v", "s1"),
+                       Goto("exit")),
+               Bloc("exit",
+                       Exit("s2")))
+       cse(f.f)
+
+       runBench(b, func() {
+               dse(f.f)
+       })
+}
+
+func BenchmarkDeadStoreUnsafe(b *testing.B) {
+       cfg := testConfig(b)
+       ptr := cfg.config.Types.UInt64.PtrTo()
+       f := cfg.Fun("entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil),
+                       Valu("v", OpConstBool, cfg.config.Types.Bool, 1, nil),
+                       Valu("a1", OpAddr, ptr, 0, nil, "sb"),
+                       Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Int64, "a1", "v", "start"),
+                       Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Bool, "a1", "v", "s1"),
+                       Goto("exit")),
+               Bloc("exit",
+                       Exit("s2")))
+       cse(f.f)
+       runBench(b, func() {
+               dse(f.f)
+       })
+}
+
+func BenchmarkDeadStoreSmallStructInit(b *testing.B) {
+       cfg := testConfig(b)
+       ptr := cfg.config.Types.BytePtr
+
+       typ := types.NewStruct([]*types.Field{
+               types.NewField(src.NoXPos, &types.Sym{Name: "A"}, cfg.config.Types.Int),
+               types.NewField(src.NoXPos, &types.Sym{Name: "B"}, cfg.config.Types.Int),
+       })
+       tmp := cfg.Temp(typ)
+
+       f := cfg.Fun("entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("sp", OpSP, cfg.config.Types.Uintptr, 0, nil),
+                       Valu("zero", OpConst64, cfg.config.Types.Int, 0, nil),
+
+                       Valu("v6", OpLocalAddr, ptr, 0, tmp, "sp", "start"),
+                       Valu("v3", OpOffPtr, ptr, 8, nil, "v6"),
+                       Valu("v22", OpOffPtr, ptr, 0, nil, "v6"),
+                       Valu("s1", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v22", "zero", "start"),
+                       Valu("s2", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v3", "zero", "s1"),
+
+                       Valu("v8", OpLocalAddr, ptr, 0, tmp, "sp", "s2"),
+                       Valu("v23", OpOffPtr, ptr, 8, nil, "v8"),
+                       Valu("v25", OpOffPtr, ptr, 0, nil, "v8"),
+                       Valu("s3", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v25", "zero", "s2"),
+                       Valu("s4", OpStore, types.TypeMem, 0, cfg.config.Types.Int, "v23", "zero", "s3"),
+                       Goto("exit")),
+               Bloc("exit",
+                       Exit("s4")))
+       cse(f.f)
+
+       runBench(b, func() {
+               dse(f.f)
+       })
+}
+
+func BenchmarkDeadStoreLargeBlock(b *testing.B) {
+       // create a very large block with many shadowed stores
+       const (
+               addrCount = 128
+               // first 7 are dead
+               storesPerAddr = 8
+       )
+       cfg := testConfig(b)
+       ptrType := cfg.config.Types.BytePtr
+       boolType := cfg.config.Types.Bool
+
+       items := []interface{}{
+               Valu("start", OpInitMem, types.TypeMem, 0, nil),
+               Valu("sb", OpSB, cfg.config.Types.Uintptr, 0, nil),
+               Valu("v", OpConstBool, boolType, 1, nil),
+       }
+
+       for i := 0; i < addrCount; i++ {
+               items = append(items,
+                       Valu(fmt.Sprintf("addr%d", i), OpAddr, ptrType, 0, nil, "sb"),
+               )
+       }
+
+       prev := "start"
+       for round := 0; round < storesPerAddr; round++ {
+               for i := 0; i < addrCount; i++ {
+                       store := fmt.Sprintf("s_%03d_%d", i, round)
+                       addr := fmt.Sprintf("addr%d", i)
+                       items = append(items,
+                               Valu(store, OpStore, types.TypeMem, 0, boolType, addr, "v", prev),
+                       )
+                       prev = store
+               }
+       }
+
+       items = append(items, Goto("exit"))
+       entryBlk := Bloc("entry", items...)
+       exitBlk := Bloc("exit", Exit(prev))
+
+       f := cfg.Fun("stress", entryBlk, exitBlk)
+
+       runBench(b, func() {
+               dse(f.f)
+       })
+}
+
+func runBench(b *testing.B, build func()) {
+       b.ReportAllocs()
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               build()
+       }
+}
author	Jakub Ciolek <jakub@ciolek.dev>
	Wed, 21 May 2025 07:34:48 +0000 (09:34 +0200)
committer	Gopher Robot <gobot@golang.org>
	Fri, 21 Nov 2025 20:37:12 +0000 (12:37 -0800)
src/cmd/compile/internal/ssa/deadstore.go		patch \| blob \| history
src/cmd/compile/internal/ssa/deadstore_test.go		patch \| blob \| history