The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe.
This CL moves Prog allocation to its clients, the compiler and the assembler.
The assembler is so fast and generates so few Progs that it does not need
optimization of Prog allocation. I could not generate measureable changes.
And even if I could, the assembly is a miniscule portion of build times.
The compiler already has a natural place to manage Prog allocation;
this CL migrates the Prog cache there.
It will be made concurrency-safe in a later CL by
partitioning the Prog cache into chunks and assigning each chunk
to a different goroutine to manage.
This CL does cause a performance degradation when the compiler
is invoked with the -S flag (to dump assembly).
However, such usage is rare and almost always done manually.
The one instance I know of in a test is TestAssembly
in cmd/compile/internal/gc, and I did not detect
a measurable performance impact there.
Passes toolstash-check -all.
Minor compiler performance impact.
Updates #15756
Performance impact from just this CL:
name old time/op new time/op delta
Template 213ms ± 4% 213ms ± 4% ~ (p=0.571 n=49+49)
Unicode 89.1ms ± 3% 89.4ms ± 3% ~ (p=0.388 n=47+48)
GoTypes 581ms ± 2% 584ms ± 3% +0.56% (p=0.019 n=47+48)
SSA 6.48s ± 2% 6.53s ± 2% +0.84% (p=0.000 n=47+49)
Flate 128ms ± 4% 128ms ± 4% ~ (p=0.832 n=49+49)
GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.815 n=48+47)
Reflect 371ms ± 4% 371ms ± 3% ~ (p=0.617 n=50+47)
Tar 112ms ± 4% 112ms ± 3% ~ (p=0.724 n=49+49)
XML 208ms ± 3% 208ms ± 4% ~ (p=0.678 n=49+50)
[Geo mean] 284ms 285ms +0.18%
name old user-ns/op new user-ns/op delta
Template 251M ± 7% 252M ±11% ~ (p=0.704 n=49+50)
Unicode 107M ± 7% 108M ± 5% +1.25% (p=0.036 n=50+49)
GoTypes 738M ± 3% 740M ± 3% ~ (p=0.305 n=49+48)
SSA 8.83G ± 2% 8.86G ± 4% ~ (p=0.098 n=47+50)
Flate 146M ± 6% 147M ± 3% ~ (p=0.584 n=48+41)
GoParser 178M ± 6% 179M ± 5% +0.93% (p=0.036 n=49+48)
Reflect 441M ± 4% 446M ± 7% ~ (p=0.218 n=44+49)
Tar 126M ± 5% 126M ± 5% ~ (p=0.766 n=48+49)
XML 245M ± 5% 244M ± 4% ~ (p=0.359 n=50+50)
[Geo mean] 341M 342M +0.51%
Performance impact from this CL combined with its parent:
name old time/op new time/op delta
Template 213ms ± 3% 214ms ± 4% ~ (p=0.685 n=47+50)
Unicode 89.8ms ± 6% 90.5ms ± 6% ~ (p=0.055 n=50+50)
GoTypes 584ms ± 3% 585ms ± 2% ~ (p=0.710 n=49+47)
SSA 6.50s ± 2% 6.53s ± 2% +0.39% (p=0.011 n=46+50)
Flate 128ms ± 3% 128ms ± 4% ~ (p=0.855 n=47+49)
GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.666 n=49+49)
Reflect 371ms ± 3% 372ms ± 3% ~ (p=0.298 n=48+48)
Tar 112ms ± 5% 113ms ± 3% ~ (p=0.107 n=49+49)
XML 208ms ± 3% 208ms ± 2% ~ (p=0.881 n=50+49)
[Geo mean] 285ms 285ms +0.26%
name old user-ns/op new user-ns/op delta
Template 254M ± 9% 252M ± 8% ~ (p=0.290 n=49+50)
Unicode 106M ± 6% 108M ± 7% +1.44% (p=0.034 n=50+50)
GoTypes 741M ± 4% 743M ± 4% ~ (p=0.992 n=50+49)
SSA 8.86G ± 2% 8.83G ± 3% ~ (p=0.158 n=47+49)
Flate 147M ± 4% 148M ± 5% ~ (p=0.832 n=50+49)
GoParser 179M ± 5% 178M ± 5% ~ (p=0.370 n=48+50)
Reflect 441M ± 6% 445M ± 7% ~ (p=0.246 n=45+47)
Tar 126M ± 6% 126M ± 6% ~ (p=0.815 n=49+50)
XML 244M ± 3% 245M ± 4% ~ (p=0.190 n=50+50)
[Geo mean] 342M 342M +0.17%
Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855
Reviewed-on: https://go-review.googlesource.com/39634
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
t.Errorf(format, args...)
ok = false
}
- obj.FlushplistNoFree(ctxt, pList)
+ obj.Flushplist(ctxt, pList, nil)
for p := top; p != nil; p = p.Link {
if p.As == obj.ATEXT {
errBuf.WriteString(s)
}
pList.Firstpc, ok = parser.Parse()
- obj.Flushplist(ctxt, pList)
+ obj.Flushplist(ctxt, pList, nil)
if ok && !failed {
t.Errorf("asm: %s had no errors", goarch)
}
break
}
// reports errors to parser.Errorf
- obj.Flushplist(ctxt, pList)
+ obj.Flushplist(ctxt, pList, nil)
}
if ok {
obj.WriteObjFile(ctxt, buf)
"cmd/internal/src"
)
+var sharedProgArray *[10000]obj.Prog // *T instead of T to work around issue 19839
+
+func init() {
+ sharedProgArray = new([10000]obj.Prog)
+}
+
// Progs accumulates Progs for a function and converts them into machine code.
type Progs struct {
- Text *obj.Prog // ATEXT Prog for this function
- next *obj.Prog // next Prog
- pc int64 // virtual PC; count of Progs
- pos src.XPos // position to use for new Progs
- curfn *Node // fn these Progs are for
+ Text *obj.Prog // ATEXT Prog for this function
+ next *obj.Prog // next Prog
+ pc int64 // virtual PC; count of Progs
+ pos src.XPos // position to use for new Progs
+ curfn *Node // fn these Progs are for
+ progcache []obj.Prog // local progcache
+ cacheidx int // first free element of progcache
}
// newProgs returns a new Progs for fn.
func newProgs(fn *Node) *Progs {
pp := new(Progs)
+ if Ctxt.CanReuseProgs() {
+ pp.progcache = sharedProgArray[:]
+ }
pp.curfn = fn
// prime the pump
- pp.next = Ctxt.NewProg()
+ pp.next = pp.NewProg()
pp.clearp(pp.next)
pp.pos = fn.Pos
return pp
}
+func (pp *Progs) NewProg() *obj.Prog {
+ if pp.cacheidx < len(pp.progcache) {
+ p := &pp.progcache[pp.cacheidx]
+ p.Ctxt = Ctxt
+ pp.cacheidx++
+ return p
+ }
+ p := new(obj.Prog)
+ p.Ctxt = Ctxt
+ return p
+}
+
// Flush converts from pp to machine code.
func (pp *Progs) Flush() {
plist := &obj.Plist{Firstpc: pp.Text, Curfn: pp.curfn}
- obj.Flushplist(Ctxt, plist)
- // Clear pp to enable GC and avoid abuse.
+ obj.Flushplist(Ctxt, plist, pp.NewProg)
+}
+
+// Free clears pp and any associated resources.
+func (pp *Progs) Free() {
+ if Ctxt.CanReuseProgs() {
+ // Clear progs to enable GC and avoid abuse.
+ s := pp.progcache[:pp.cacheidx]
+ for i := range s {
+ s[i] = obj.Prog{}
+ }
+ }
+ // Clear pp to avoid abuse.
*pp = Progs{}
}
// Prog adds a Prog with instruction As to pp.
func (pp *Progs) Prog(as obj.As) *obj.Prog {
p := pp.next
- pp.next = Ctxt.NewProg()
+ pp.next = pp.NewProg()
pp.clearp(pp.next)
p.Link = pp.next
}
func (pp *Progs) Appendpp(p *obj.Prog, as obj.As, ftype obj.AddrType, freg int16, foffset int64, ttype obj.AddrType, treg int16, toffset int64) *obj.Prog {
- q := Ctxt.NewProg()
+ q := pp.NewProg()
pp.clearp(q)
q.As = as
q.Pos = p.Pos
pp := newProgs(fn)
genssa(ssafn, pp)
fieldtrack(pp.Text.From.Sym, fn.Func.FieldTrack)
- if pp.Text.To.Offset >= 1<<31 {
+ if pp.Text.To.Offset < 1<<31 {
+ pp.Flush()
+ } else {
largeStackFrames = append(largeStackFrames, fn.Pos)
- return
}
- pp.Flush()
+ pp.Free()
}
func debuginfo(fnsym *obj.LSym, curfn interface{}) []*dwarf.Var {
// state for writing objects
Text []*LSym
Data []*LSym
-
- // Cache of Progs
- allocIdx int
- progs [10000]Prog
}
func (ctxt *Link) Diag(format string, args ...interface{}) {
// It is used to provide access to cached/bulk-allocated Progs to the assemblers.
type ProgAlloc func() *Prog
-func Flushplist(ctxt *Link, plist *Plist) {
- flushplist(ctxt, plist, !ctxt.Debugasm)
-}
-func FlushplistNoFree(ctxt *Link, plist *Plist) {
- flushplist(ctxt, plist, false)
-}
-func flushplist(ctxt *Link, plist *Plist, freeProgs bool) {
+func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc) {
// Build list of symbols, and assign instructions to lists.
var curtext *LSym
var etext *Prog
etext = p
}
- newprog := ProgAlloc(ctxt.NewProg)
+ if newprog == nil {
+ newprog = ctxt.NewProg
+ }
// Add reference to Go arguments for C or assembly functions without them.
for _, s := range text {
ctxt.Arch.Assemble(ctxt, s, newprog)
linkpcln(ctxt, s)
makeFuncDebugEntry(ctxt, plist.Curfn, s)
- if freeProgs {
- s.Text = nil
- }
}
// Add to running list in ctxt.
ctxt.Text = append(ctxt.Text, text...)
- if freeProgs {
- ctxt.freeProgs()
- }
}
func (ctxt *Link) Globl(s *LSym, size int64, flag int) {
}
func (ctxt *Link) NewProg() *Prog {
- var p *Prog
- if i := ctxt.allocIdx; i < len(ctxt.progs) {
- p = &ctxt.progs[i]
- ctxt.allocIdx = i + 1
- } else {
- p = new(Prog) // should be the only call to this; all others should use ctxt.NewProg
- }
+ p := new(Prog)
p.Ctxt = ctxt
return p
}
-func (ctxt *Link) freeProgs() {
- s := ctxt.progs[:ctxt.allocIdx]
- for i := range s {
- s[i] = Prog{}
- }
- ctxt.allocIdx = 0
+
+func (ctxt *Link) CanReuseProgs() bool {
+ return !ctxt.Debugasm
}
func (ctxt *Link) Dconv(a *Addr) string {