From 653d56075dbe24239e4cc0a8e020e57383593035 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 24 Apr 2015 14:13:06 -0400 Subject: [PATCH] cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit I believe the benchmarks that get slower are under register pressure, and not making the call unconditionally makes the pressure worse, and the register allocator doesn't do a great job. But part of the point of this sequence is to get the write barriers out of the way so I can work on the register allocator, so that's okay. name old new delta BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~ BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~ BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~ BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00% BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88% BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84% BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26% BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37% BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~ BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54% BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~ BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~ BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~ BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~ BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~ BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~ BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23% BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~ BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~ BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27% BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65% BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17% BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55% BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38% BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~ BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~ BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~ BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30% BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31% BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~ Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2 Reviewed-on: https://go-review.googlesource.com/9224 Reviewed-by: Austin Clements --- src/cmd/internal/gc/builtin.go | 1 + src/cmd/internal/gc/builtin/runtime.go | 4 +++- src/cmd/internal/gc/cgen.go | 27 +++++++++++++++++++++----- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/cmd/internal/gc/builtin.go b/src/cmd/internal/gc/builtin.go index 6cf3a89671..6bdf78c8ab 100644 --- a/src/cmd/internal/gc/builtin.go +++ b/src/cmd/internal/gc/builtin.go @@ -87,6 +87,7 @@ const runtimeimport = "" + "func @\"\".chanrecv2 (@\"\".chanType·2 *byte, @\"\".hchan·3 <-chan any, @\"\".elem·4 *any) (? bool)\n" + "func @\"\".chansend1 (@\"\".chanType·1 *byte, @\"\".hchan·2 chan<- any, @\"\".elem·3 *any)\n" + "func @\"\".closechan (@\"\".hchan·1 any)\n" + + "var @\"\".writeBarrierEnabled bool\n" + "func @\"\".writebarrierptr (@\"\".dst·1 *any, @\"\".src·2 any)\n" + "func @\"\".writebarrierstring (@\"\".dst·1 *any, @\"\".src·2 any)\n" + "func @\"\".writebarrierslice (@\"\".dst·1 *any, @\"\".src·2 any)\n" + diff --git a/src/cmd/internal/gc/builtin/runtime.go b/src/cmd/internal/gc/builtin/runtime.go index ec769e1e99..179a4ddd9a 100644 --- a/src/cmd/internal/gc/builtin/runtime.go +++ b/src/cmd/internal/gc/builtin/runtime.go @@ -108,7 +108,8 @@ func chanrecv2(chanType *byte, hchan <-chan any, elem *any) bool func chansend1(chanType *byte, hchan chan<- any, elem *any) func closechan(hchan any) -// *byte is really *runtime.Type +var writeBarrierEnabled bool + func writebarrierptr(dst *any, src any) func writebarrierstring(dst *any, src any) func writebarrierslice(dst *any, src any) @@ -144,6 +145,7 @@ func writebarrierfat1101(dst *any, _ uintptr, src any) func writebarrierfat1110(dst *any, _ uintptr, src any) func writebarrierfat1111(dst *any, _ uintptr, src any) +// *byte is really *runtime.Type func typedmemmove(typ *byte, dst *any, src *any) func typedslicecopy(typ *byte, dst any, src any) int diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go index 427c671947..6c8f7b56a8 100644 --- a/src/cmd/internal/gc/cgen.go +++ b/src/cmd/internal/gc/cgen.go @@ -793,10 +793,24 @@ func cgen_wbptr(n, res *Node) { if Debug_wb > 0 { Warn("write barrier") } + var dst, src Node - Agenr(res, &dst, nil) - Cgenr(n, &src, nil) - p := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &dst, nil) + Igen(res, &dst, nil) + if n.Op == OREGISTER { + src = *n + Regrealloc(&src) + } else { + Cgenr(n, &src, nil) + } + + Thearch.Gins(Thearch.Optoas(OCMP, Types[TUINT8]), syslook("writeBarrierEnabled", 0), Nodintconst(0)) + pbr := Gbranch(Thearch.Optoas(ONE, Types[TUINT32]), nil, -1) + Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, &dst) + pjmp := Gbranch(obj.AJMP, nil, 0) + Patch(pbr, Pc) + var adst Node + Agenr(&dst, &adst, &dst) + p := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &adst, nil) a := &p.To a.Type = obj.TYPE_MEM a.Reg = int16(Thearch.REGSP) @@ -807,12 +821,15 @@ func cgen_wbptr(n, res *Node) { p2 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil) p2.To = p.To p2.To.Offset += int64(Widthptr) - Regfree(&dst) - Regfree(&src) + Regfree(&adst) if sys_wbptr == nil { sys_wbptr = writebarrierfn("writebarrierptr", Types[Tptr], Types[Tptr]) } Ginscall(sys_wbptr, 0) + Patch(pjmp, Pc) + + Regfree(&dst) + Regfree(&src) } func cgen_wbfat(n, res *Node) { -- 2.50.0