From 89a0af86b819ceb8095c686fc5109b40082bc8c5 Mon Sep 17 00:00:00 2001 From: "khr@golang.org" Date: Mon, 9 Jun 2025 16:44:46 -0700 Subject: [PATCH] cmd/compile: allow ops to specify clobbering input registers Same as clobbering fixed registers, but which register is clobbered depends on which register was assigned to the input. Add code similar to resultInArg0 processing that makes a register copy before allowing the op to clobber the last available copy of a value. (Will be used by subsequent CLs in this stack.) Change-Id: I6bad88b2cb9ac3303d960ff0fb1611727292cfc4 Reviewed-on: https://go-review.googlesource.com/c/go/+/680335 Auto-Submit: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Jorropo Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/_gen/main.go | 12 ++++++- src/cmd/compile/internal/ssa/op.go | 4 +++ src/cmd/compile/internal/ssa/regalloc.go | 39 ++++++++++++++++++++++- 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/main.go b/src/cmd/compile/internal/ssa/_gen/main.go index f20603245b..55f7dd1a1a 100644 --- a/src/cmd/compile/internal/ssa/_gen/main.go +++ b/src/cmd/compile/internal/ssa/_gen/main.go @@ -87,6 +87,10 @@ type regInfo struct { // clobbers encodes the set of registers that are overwritten by // the instruction (other than the output registers). clobbers regMask + // Instruction clobbers the register containing input 0. + clobbersArg0 bool + // Instruction clobbers the register containing input 1. + clobbersArg1 bool // outputs[i] encodes the set of registers allowed for the i'th output. outputs []regMask } @@ -293,7 +297,7 @@ func genOp() { fmt.Fprintf(w, "argLen: %d,\n", v.argLength) if v.rematerializeable { - if v.reg.clobbers != 0 { + if v.reg.clobbers != 0 || v.reg.clobbersArg0 || v.reg.clobbersArg1 { log.Fatalf("%s is rematerializeable and clobbers registers", v.name) } if v.clobberFlags { @@ -402,6 +406,12 @@ func genOp() { if v.reg.clobbers > 0 { fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers)) } + if v.reg.clobbersArg0 { + fmt.Fprintf(w, "clobbersArg0: true,\n") + } + if v.reg.clobbersArg1 { + fmt.Fprintf(w, "clobbersArg1: true,\n") + } // reg outputs s = s[:0] diff --git a/src/cmd/compile/internal/ssa/op.go b/src/cmd/compile/internal/ssa/op.go index 3bf5863360..d5c7394a26 100644 --- a/src/cmd/compile/internal/ssa/op.go +++ b/src/cmd/compile/internal/ssa/op.go @@ -70,6 +70,10 @@ type regInfo struct { // clobbers encodes the set of registers that are overwritten by // the instruction (other than the output registers). clobbers regMask + // Instruction clobbers the register containing input 0. + clobbersArg0 bool + // Instruction clobbers the register containing input 1. + clobbersArg1 bool // outputs is the same as inputs, but for the outputs of the instruction. outputs []outputInfo } diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 5be771571f..de49bb40c5 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -1686,8 +1686,38 @@ func (s *regAllocState) regalloc(f *Func) { } } } - ok: + for i := 0; i < 2; i++ { + if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) { + continue + } + if !s.liveAfterCurrentInstruction(v.Args[i]) { + // arg is dead. We can clobber its register. + continue + } + if s.values[v.Args[i].ID].rematerializeable { + // We can rematerialize the input, don't worry about clobbering it. + continue + } + if countRegs(s.values[v.Args[i].ID].regs) >= 2 { + // We have at least 2 copies of arg. We can afford to clobber one. + continue + } + // Possible new registers to copy into. + m := s.compatRegs(v.Args[i].Type) &^ s.used + if m == 0 { + // No free registers. In this case we'll just clobber the + // input and future uses of that input must use a restore. + // TODO(khr): We should really do this like allocReg does it, + // spilling the value with the most distant next use. + continue + } + // Copy input to a new clobberable register. + c := s.allocValToReg(v.Args[i], m, true, v.Pos) + s.copies[c] = false + args[i] = c + } + // Pick a temporary register if needed. // It should be distinct from all the input registers, so we // allocate it after all the input registers, but before @@ -1709,6 +1739,13 @@ func (s *regAllocState) regalloc(f *Func) { s.tmpused |= regMask(1) << tmpReg } + if regspec.clobbersArg0 { + s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num)) + } + if regspec.clobbersArg1 { + s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num)) + } + // Now that all args are in regs, we're ready to issue the value itself. // Before we pick a register for the output value, allow input registers // to be deallocated. We do this here so that the output can use the -- 2.51.0