From 74163c895a7c5d7ebafeaed1f4a0891d218e2704 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 21 Oct 2024 15:36:38 -0700 Subject: [PATCH] cmd/compile: use STP/LDP around morestack on arm64 The spill/restore code around morestack is almost never exectued, so we should make it as small as possible. Using 2-register loads/stores makes sense here. Also, the offsets from SP are pretty small so the offset almost always fits in the (smaller than a normal load/store) offset field of the instruction. Makes cmd/go 0.6% smaller. Change-Id: I8845283c1b269a259498153924428f6173bda293 Reviewed-on: https://go-review.googlesource.com/c/go/+/621556 LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Cherry Mui --- src/cmd/compile/internal/arm64/ssa.go | 77 ++++++++++++++++++++++++--- src/cmd/internal/obj/link.go | 9 ++++ test/codegen/spills.go | 31 +++++++++++ 3 files changed, 110 insertions(+), 7 deletions(-) create mode 100644 test/codegen/spills.go diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index e2c4873192..adcabb1b95 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -78,6 +78,48 @@ func storeByType(t *types.Type) obj.As { panic("bad store type") } +// loadByType2 returns an opcode that can load consecutive memory locations into 2 registers with type t. +// returns obj.AXXX if no such opcode exists. +func loadByType2(t *types.Type) obj.As { + if t.IsFloat() { + switch t.Size() { + case 4: + return arm64.AFLDPS + case 8: + return arm64.AFLDPD + } + } else { + switch t.Size() { + case 4: + return arm64.ALDPW + case 8: + return arm64.ALDP + } + } + return obj.AXXX +} + +// storeByType2 returns an opcode that can store registers with type t into 2 consecutive memory locations. +// returns obj.AXXX if no such opcode exists. +func storeByType2(t *types.Type) obj.As { + if t.IsFloat() { + switch t.Size() { + case 4: + return arm64.AFSTPS + case 8: + return arm64.AFSTPD + } + } else { + switch t.Size() { + case 4: + return arm64.ASTPW + case 8: + return arm64.ASTP + } + } + return obj.AXXX +} + // makeshift encodes a register shifted by a constant, used as an Offset in Prog. func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 { if s < 0 || s >= 64 { @@ -167,17 +209,38 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() ssagen.AddrAuto(&p.To, v) case ssa.OpArgIntReg, ssa.OpArgFloatReg: + ssagen.CheckArgReg(v) // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill // The loop only runs once. - for _, a := range v.Block.Func.RegArgs { - // Pass the spill/unspill information along to the assembler, offset by size of - // the saved LR slot. + args := v.Block.Func.RegArgs + if len(args) == 0 { + break + } + v.Block.Func.RegArgs = nil // prevent from running again + + for i := 0; i < len(args); i++ { + a := args[i] + // Offset by size of the saved LR slot. addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize) - s.FuncInfo().AddSpill( - obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) + // Look for double-register operations if we can. + if i < len(args)-1 { + b := args[i+1] + if a.Type.Size() == b.Type.Size() && + a.Type.IsFloat() == b.Type.IsFloat() && + b.Offset == a.Offset+a.Type.Size() { + ld := loadByType2(a.Type) + st := storeByType2(a.Type) + if ld != obj.AXXX && st != obj.AXXX { + s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Reg2: b.Reg, Addr: addr, Unspill: ld, Spill: st}) + i++ // b is done also, skip it. + continue + } + } + } + // Pass the spill/unspill information along to the assembler. + s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) } - v.Block.Func.RegArgs = nil - ssagen.CheckArgReg(v) + case ssa.OpARM64ADD, ssa.OpARM64SUB, ssa.OpARM64AND, diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index a3e4a0d309..462b9b2bd2 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -1100,6 +1100,7 @@ type Auto struct { type RegSpill struct { Addr Addr Reg int16 + Reg2 int16 // If not 0, a second register to spill at Addr+regSize. Only for some archs. Spill, Unspill As } @@ -1192,6 +1193,10 @@ func (fi *FuncInfo) SpillRegisterArgs(last *Prog, pa ProgAlloc) *Prog { spill.As = ra.Spill spill.From.Type = TYPE_REG spill.From.Reg = ra.Reg + if ra.Reg2 != 0 { + spill.From.Type = TYPE_REGREG + spill.From.Offset = int64(ra.Reg2) + } spill.To = ra.Addr last = spill } @@ -1208,6 +1213,10 @@ func (fi *FuncInfo) UnspillRegisterArgs(last *Prog, pa ProgAlloc) *Prog { unspill.From = ra.Addr unspill.To.Type = TYPE_REG unspill.To.Reg = ra.Reg + if ra.Reg2 != 0 { + unspill.To.Type = TYPE_REGREG + unspill.To.Offset = int64(ra.Reg2) + } last = unspill } return last diff --git a/test/codegen/spills.go b/test/codegen/spills.go new file mode 100644 index 0000000000..c8ac9859a4 --- /dev/null +++ b/test/codegen/spills.go @@ -0,0 +1,31 @@ +// asmcheck + +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func i64(a, b int64) int64 { // arm64:`STP\s`,`LDP\s` + g() + return a + b +} + +func i32(a, b int32) int32 { // arm64:`STPW`,`LDPW` + g() + return a + b +} + +func f64(a, b float64) float64 { // arm64:`FSTPD`,`FLDPD` + g() + return a + b +} + +func f32(a, b float32) float32 { // arm64:`FSTPS`,`FLDPS` + g() + return a + b +} + +//go:noinline +func g() { +} -- 2.48.1