From: Russ Cox Date: Fri, 3 Apr 2015 16:23:28 +0000 (-0400) Subject: cmd/internal/gc: inline runtime.getg X-Git-Tag: go1.5beta1~1276 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=92c826b1b2473e743964d3478b73a9c39a579abf;p=gostls13.git cmd/internal/gc: inline runtime.getg This more closely restores what the old C runtime did. (In C, g was an 'extern register' with the same effective implementation as in this CL.) On a late 2012 MacBookPro10,2, best of 5 old vs best of 5 new: benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 4981312777 4463426605 -10.40% BenchmarkFannkuch11 3046495712 3006819428 -1.30% BenchmarkFmtFprintfEmpty 89.3 79.8 -10.64% BenchmarkFmtFprintfString 284 262 -7.75% BenchmarkFmtFprintfInt 282 262 -7.09% BenchmarkFmtFprintfIntInt 480 448 -6.67% BenchmarkFmtFprintfPrefixedInt 382 358 -6.28% BenchmarkFmtFprintfFloat 529 486 -8.13% BenchmarkFmtManyArgs 1849 1773 -4.11% BenchmarkGobDecode 12835963 11794385 -8.11% BenchmarkGobEncode 10527170 10288422 -2.27% BenchmarkGzip 436109569 438422516 +0.53% BenchmarkGunzip 110121663 109843648 -0.25% BenchmarkHTTPClientServer 81930 85446 +4.29% BenchmarkJSONEncode 24638574 24280603 -1.45% BenchmarkJSONDecode 93022423 85753546 -7.81% BenchmarkMandelbrot200 4703899 4735407 +0.67% BenchmarkGoParse 5319853 5086843 -4.38% BenchmarkRegexpMatchEasy0_32 151 151 +0.00% BenchmarkRegexpMatchEasy0_1K 452 453 +0.22% BenchmarkRegexpMatchEasy1_32 131 132 +0.76% BenchmarkRegexpMatchEasy1_1K 761 722 -5.12% BenchmarkRegexpMatchMedium_32 228 224 -1.75% BenchmarkRegexpMatchMedium_1K 63751 64296 +0.85% BenchmarkRegexpMatchHard_32 3188 3238 +1.57% BenchmarkRegexpMatchHard_1K 95396 96756 +1.43% BenchmarkRevcomp 661587262 687107364 +3.86% BenchmarkTemplate 108312598 104008540 -3.97% BenchmarkTimeParse 453 459 +1.32% BenchmarkTimeFormat 475 441 -7.16% The garbage benchmark from the benchmarks subrepo gets 2.6% faster as well. Change-Id: I320aeda332db81012688b26ffab23f6581c59cfa Reviewed-on: https://go-review.googlesource.com/8460 Reviewed-by: Rick Hudson Run-TryBot: Rick Hudson Reviewed-by: Austin Clements --- diff --git a/src/cmd/5g/galign.go b/src/cmd/5g/galign.go index 1b349e105c..0a6c6552bf 100644 --- a/src/cmd/5g/galign.go +++ b/src/cmd/5g/galign.go @@ -64,6 +64,7 @@ func main() { gc.Thearch.Defframe = defframe gc.Thearch.Excise = excise gc.Thearch.Expandchecks = expandchecks + gc.Thearch.Getg = getg gc.Thearch.Gins = gins gc.Thearch.Ginscon = ginscon gc.Thearch.Ginsnop = ginsnop diff --git a/src/cmd/5g/ggen.go b/src/cmd/5g/ggen.go index 753c6e0092..edad7af32d 100644 --- a/src/cmd/5g/ggen.go +++ b/src/cmd/5g/ggen.go @@ -494,3 +494,10 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool { } return false } + +// res = runtime.getg() +func getg(res *gc.Node) { + var n1 gc.Node + gc.Nodreg(&n1, res.Type, arm.REGG) + gmove(&n1, res) +} diff --git a/src/cmd/6g/galign.go b/src/cmd/6g/galign.go index a73ddc6475..74be60e5ee 100644 --- a/src/cmd/6g/galign.go +++ b/src/cmd/6g/galign.go @@ -96,6 +96,7 @@ func main() { gc.Thearch.Dodiv = dodiv gc.Thearch.Excise = excise gc.Thearch.Expandchecks = expandchecks + gc.Thearch.Getg = getg gc.Thearch.Gins = gins gc.Thearch.Ginscon = ginscon gc.Thearch.Ginsnop = ginsnop diff --git a/src/cmd/6g/ggen.go b/src/cmd/6g/ggen.go index e609d0e60f..ceeec251c1 100644 --- a/src/cmd/6g/ggen.go +++ b/src/cmd/6g/ggen.go @@ -671,3 +671,20 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool { } return false } + +// res = runtime.getg() +func getg(res *gc.Node) { + var n1 gc.Node + gc.Regalloc(&n1, res.Type, res) + mov := optoas(gc.OAS, gc.Types[gc.Tptr]) + p := gins(mov, nil, &n1) + p.From.Type = obj.TYPE_REG + p.From.Reg = x86.REG_TLS + p = gins(mov, nil, &n1) + p.From = p.To + p.From.Type = obj.TYPE_MEM + p.From.Index = x86.REG_TLS + p.From.Scale = 1 + gmove(&n1, res) + gc.Regfree(&n1) +} diff --git a/src/cmd/7g/galign.go b/src/cmd/7g/galign.go index 1c50c21aa6..36b54aa935 100644 --- a/src/cmd/7g/galign.go +++ b/src/cmd/7g/galign.go @@ -62,6 +62,7 @@ func main() { gc.Thearch.Dodiv = dodiv gc.Thearch.Excise = excise gc.Thearch.Expandchecks = expandchecks + gc.Thearch.Getg = getg gc.Thearch.Gins = gins gc.Thearch.Ginscon = ginscon gc.Thearch.Ginsnop = ginsnop diff --git a/src/cmd/7g/ggen.go b/src/cmd/7g/ggen.go index 0fc5854315..94685d7fce 100644 --- a/src/cmd/7g/ggen.go +++ b/src/cmd/7g/ggen.go @@ -532,3 +532,10 @@ func expandchecks(firstp *obj.Prog) { p2.To.Offset = 0 } } + +// res = runtime.getg() +func getg(res *gc.Node) { + var n1 gc.Node + gc.Nodreg(&n1, res.Type, arm64.REGG) + gmove(&n1, res) +} diff --git a/src/cmd/8g/galign.go b/src/cmd/8g/galign.go index 1c03df5907..2a8e0b7aa9 100644 --- a/src/cmd/8g/galign.go +++ b/src/cmd/8g/galign.go @@ -77,6 +77,7 @@ func main() { gc.Thearch.Dodiv = cgen_div gc.Thearch.Excise = excise gc.Thearch.Expandchecks = expandchecks + gc.Thearch.Getg = getg gc.Thearch.Gins = gins gc.Thearch.Ginscon = ginscon gc.Thearch.Ginsnop = ginsnop diff --git a/src/cmd/8g/ggen.go b/src/cmd/8g/ggen.go index 115c962a00..9a551b0cac 100644 --- a/src/cmd/8g/ggen.go +++ b/src/cmd/8g/ggen.go @@ -944,3 +944,20 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool { } return false } + +// res = runtime.getg() +func getg(res *gc.Node) { + var n1 gc.Node + gc.Regalloc(&n1, res.Type, res) + mov := optoas(gc.OAS, gc.Types[gc.Tptr]) + p := gins(mov, nil, &n1) + p.From.Type = obj.TYPE_REG + p.From.Reg = x86.REG_TLS + p = gins(mov, nil, &n1) + p.From = p.To + p.From.Type = obj.TYPE_MEM + p.From.Index = x86.REG_TLS + p.From.Scale = 1 + gmove(&n1, res) + gc.Regfree(&n1) +} diff --git a/src/cmd/9g/galign.go b/src/cmd/9g/galign.go index b9e6c32ebb..68eab5852c 100644 --- a/src/cmd/9g/galign.go +++ b/src/cmd/9g/galign.go @@ -71,6 +71,7 @@ func main() { gc.Thearch.Dodiv = dodiv gc.Thearch.Excise = excise gc.Thearch.Expandchecks = expandchecks + gc.Thearch.Getg = getg gc.Thearch.Gins = gins gc.Thearch.Ginscon = ginscon gc.Thearch.Ginsnop = ginsnop diff --git a/src/cmd/9g/ggen.go b/src/cmd/9g/ggen.go index a009186ecb..9af36a158b 100644 --- a/src/cmd/9g/ggen.go +++ b/src/cmd/9g/ggen.go @@ -549,3 +549,10 @@ func expandchecks(firstp *obj.Prog) { p2.To.Offset = 0 } } + +// res = runtime.getg() +func getg(res *gc.Node) { + var n1 gc.Node + gc.Nodreg(&n1, res.Type, ppc64.REGG) + gmove(&n1, res) +} diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go index 3b628ac243..886a2d12f4 100644 --- a/src/cmd/internal/gc/cgen.go +++ b/src/cmd/internal/gc/cgen.go @@ -418,6 +418,10 @@ func Cgen(n *Node, res *Node) { Regfree(&n1) return + case OGETG: + Thearch.Getg(res) + return + // symmetric binary case OAND, OOR, diff --git a/src/cmd/internal/gc/fmt.go b/src/cmd/internal/gc/fmt.go index 38e358a735..589f20cff4 100644 --- a/src/cmd/internal/gc/fmt.go +++ b/src/cmd/internal/gc/fmt.go @@ -976,6 +976,7 @@ var opprec = []int{ OCONV: 8, OCOPY: 8, ODELETE: 8, + OGETG: 8, OLEN: 8, OLITERAL: 8, OMAKESLICE: 8, @@ -1363,7 +1364,7 @@ func exprfmt(n *Node, prec int) string { } return fmt.Sprintf("%v(%v)", Oconv(int(n.Op), obj.FmtSharp), Hconv(n.List, obj.FmtComma)) - case OCALL, OCALLFUNC, OCALLINTER, OCALLMETH: + case OCALL, OCALLFUNC, OCALLINTER, OCALLMETH, OGETG: var f string f += exprfmt(n.Left, nprec) if n.Isddd { diff --git a/src/cmd/internal/gc/gen.go b/src/cmd/internal/gc/gen.go index e52ff65845..4b7344a495 100644 --- a/src/cmd/internal/gc/gen.go +++ b/src/cmd/internal/gc/gen.go @@ -1002,6 +1002,10 @@ func gen(n *Node) { case ORETURN, ORETJMP: cgen_ret(n) + // Function calls turned into compiler intrinsics. + // At top level, can just ignore the call and make sure to preserve side effects in the argument, if any. + case OGETG: + // nothing case OSQRT: cgen_discard(n.Left) diff --git a/src/cmd/internal/gc/go.go b/src/cmd/internal/gc/go.go index 027ad28fb1..a6faaa5f91 100644 --- a/src/cmd/internal/gc/go.go +++ b/src/cmd/internal/gc/go.go @@ -798,6 +798,7 @@ type Arch struct { Dodiv func(int, *Node, *Node, *Node) Excise func(*Flow) Expandchecks func(*obj.Prog) + Getg func(*Node) Gins func(int, *Node, *Node) *obj.Prog Ginscon func(int, int64, *Node) Ginsnop func() diff --git a/src/cmd/internal/gc/syntax.go b/src/cmd/internal/gc/syntax.go index 736c7afda6..11cdf298f7 100644 --- a/src/cmd/internal/gc/syntax.go +++ b/src/cmd/internal/gc/syntax.go @@ -304,6 +304,7 @@ const ( ORETJMP // return to other function OPS // compare parity set (for x86 NaN check) OSQRT // sqrt(float64), on systems that have hw support + OGETG // runtime.getg() (read g pointer) OEND ) diff --git a/src/cmd/internal/gc/typecheck.go b/src/cmd/internal/gc/typecheck.go index 43991648f8..08262b15d4 100644 --- a/src/cmd/internal/gc/typecheck.go +++ b/src/cmd/internal/gc/typecheck.go @@ -1366,6 +1366,17 @@ OpSwitch: t = t.Type } n.Type = t + + if n.Op == OCALLFUNC && n.Left.Op == ONAME && (compiling_runtime != 0 || n.Left.Sym.Pkg == Runtimepkg) && n.Left.Sym.Name == "getg" { + // Emit code for runtime.getg() directly instead of calling function. + // Most such rewrites (for example the similar one for math.Sqrt) should be done in walk, + // so that the ordering pass can make sure to preserve the semantics of the original code + // (in particular, the exact time of the function call) by introducing temporaries. + // In this case, we know getg() always returns the same result within a given function + // and we want to avoid the temporaries, so we do the rewrite earlier than is typical. + n.Op = OGETG + } + break OpSwitch } @@ -1376,6 +1387,7 @@ OpSwitch: } n.Type = getoutargx(l.Type) + break OpSwitch case OCAP, OLEN, OREAL, OIMAG: diff --git a/src/cmd/internal/gc/walk.go b/src/cmd/internal/gc/walk.go index bf91116962..1012aa0538 100644 --- a/src/cmd/internal/gc/walk.go +++ b/src/cmd/internal/gc/walk.go @@ -179,7 +179,8 @@ func walkstmt(np **Node) { OPRINTN, OPANIC, OEMPTY, - ORECOVER: + ORECOVER, + OGETG: if n.Typecheck == 0 { Fatal("missing typecheck: %v", Nconv(n, obj.FmtSign)) } @@ -424,7 +425,8 @@ func walkexpr(np **Node, init **NodeList) { ONONAME, OINDREG, OEMPTY, - OPARAM: + OPARAM, + OGETG: goto ret case ONOT, diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index bee8b29694..f2222d03b0 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -1639,12 +1639,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP -TEXT runtime·getg(SB),NOSPLIT,$0-4 - get_tls(CX) - MOVL g(CX), AX - MOVL AX, ret+0(FP) - RET - TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 MOVL addr+0(FP), AX PREFETCHT0 (AX) diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 946e151110..0e5389fbd7 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1673,12 +1673,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP -TEXT runtime·getg(SB),NOSPLIT,$0-8 - get_tls(CX) - MOVQ g(CX), AX - MOVQ AX, ret+0(FP) - RET - TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 MOVQ addr+0(FP), AX PREFETCHT0 (AX) diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s index e144c4071f..23e2cb9662 100644 --- a/src/runtime/asm_amd64p32.s +++ b/src/runtime/asm_amd64p32.s @@ -1096,12 +1096,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP -TEXT runtime·getg(SB),NOSPLIT,$0-4 - get_tls(CX) - MOVL g(CX), AX - MOVL AX, ret+0(FP) - RET - TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 MOVL addr+0(FP), AX PREFETCHT0 (AX) diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index a2e1e4be76..b7042ea26b 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -984,10 +984,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0 // traceback from goexit1 must hit code range of goexit MOVW R0, R0 // NOP -TEXT runtime·getg(SB),NOSPLIT,$-4-4 - MOVW g, ret+0(FP) - RET - TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4 RET diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index 3c09d53a11..0b21a1da2f 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -903,10 +903,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0 MOVD R0, R0 // NOP BL runtime·goexit1(SB) // does not return -TEXT runtime·getg(SB),NOSPLIT,$-8-8 - MOVD g, ret+0(FP) - RET - // TODO(aram): use PRFM here. TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 RET diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index ef6405003b..5b7ad41df0 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -1134,10 +1134,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0 // traceback from goexit1 must hit code range of goexit MOVD R0, R0 // NOP -TEXT runtime·getg(SB),NOSPLIT,$-8-8 - MOVD g, ret+0(FP) - RETURN - TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8 RETURN diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index 99d8dd45e2..7b6fbb0349 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -18,6 +18,9 @@ func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { return unsafe.Pointer(uintptr(p) + x) } +// getg returns the pointer to the current g. +// The compiler rewrites calls to this function into instructions +// that fetch the g directly (from TLS or from the dedicated register). func getg() *g // mcall switches from the g to the g0 stack and invokes fn(g),