]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/gc: inline runtime.getg
authorRuss Cox <rsc@golang.org>
Fri, 3 Apr 2015 16:23:28 +0000 (12:23 -0400)
committerRuss Cox <rsc@golang.org>
Tue, 7 Apr 2015 14:26:47 +0000 (14:26 +0000)
This more closely restores what the old C runtime did.
(In C, g was an 'extern register' with the same effective
implementation as in this CL.)

On a late 2012 MacBookPro10,2, best of 5 old vs best of 5 new:

benchmark                          old ns/op      new ns/op      delta
BenchmarkBinaryTree17              4981312777     4463426605     -10.40%
BenchmarkFannkuch11                3046495712     3006819428     -1.30%
BenchmarkFmtFprintfEmpty           89.3           79.8           -10.64%
BenchmarkFmtFprintfString          284            262            -7.75%
BenchmarkFmtFprintfInt             282            262            -7.09%
BenchmarkFmtFprintfIntInt          480            448            -6.67%
BenchmarkFmtFprintfPrefixedInt     382            358            -6.28%
BenchmarkFmtFprintfFloat           529            486            -8.13%
BenchmarkFmtManyArgs               1849           1773           -4.11%
BenchmarkGobDecode                 12835963       11794385       -8.11%
BenchmarkGobEncode                 10527170       10288422       -2.27%
BenchmarkGzip                      436109569      438422516      +0.53%
BenchmarkGunzip                    110121663      109843648      -0.25%
BenchmarkHTTPClientServer          81930          85446          +4.29%
BenchmarkJSONEncode                24638574       24280603       -1.45%
BenchmarkJSONDecode                93022423       85753546       -7.81%
BenchmarkMandelbrot200             4703899        4735407        +0.67%
BenchmarkGoParse                   5319853        5086843        -4.38%
BenchmarkRegexpMatchEasy0_32       151            151            +0.00%
BenchmarkRegexpMatchEasy0_1K       452            453            +0.22%
BenchmarkRegexpMatchEasy1_32       131            132            +0.76%
BenchmarkRegexpMatchEasy1_1K       761            722            -5.12%
BenchmarkRegexpMatchMedium_32      228            224            -1.75%
BenchmarkRegexpMatchMedium_1K      63751          64296          +0.85%
BenchmarkRegexpMatchHard_32        3188           3238           +1.57%
BenchmarkRegexpMatchHard_1K        95396          96756          +1.43%
BenchmarkRevcomp                   661587262      687107364      +3.86%
BenchmarkTemplate                  108312598      104008540      -3.97%
BenchmarkTimeParse                 453            459            +1.32%
BenchmarkTimeFormat                475            441            -7.16%

The garbage benchmark from the benchmarks subrepo gets 2.6% faster as well.

Change-Id: I320aeda332db81012688b26ffab23f6581c59cfa
Reviewed-on: https://go-review.googlesource.com/8460
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Rick Hudson <rlh@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
24 files changed:
src/cmd/5g/galign.go
src/cmd/5g/ggen.go
src/cmd/6g/galign.go
src/cmd/6g/ggen.go
src/cmd/7g/galign.go
src/cmd/7g/ggen.go
src/cmd/8g/galign.go
src/cmd/8g/ggen.go
src/cmd/9g/galign.go
src/cmd/9g/ggen.go
src/cmd/internal/gc/cgen.go
src/cmd/internal/gc/fmt.go
src/cmd/internal/gc/gen.go
src/cmd/internal/gc/go.go
src/cmd/internal/gc/syntax.go
src/cmd/internal/gc/typecheck.go
src/cmd/internal/gc/walk.go
src/runtime/asm_386.s
src/runtime/asm_amd64.s
src/runtime/asm_amd64p32.s
src/runtime/asm_arm.s
src/runtime/asm_arm64.s
src/runtime/asm_ppc64x.s
src/runtime/stubs.go

index 1b349e105c8777bd07d115d9fb342ee7c93db9be..0a6c6552bfc45e607395e33ae9e996669de9e40f 100644 (file)
@@ -64,6 +64,7 @@ func main() {
        gc.Thearch.Defframe = defframe
        gc.Thearch.Excise = excise
        gc.Thearch.Expandchecks = expandchecks
+       gc.Thearch.Getg = getg
        gc.Thearch.Gins = gins
        gc.Thearch.Ginscon = ginscon
        gc.Thearch.Ginsnop = ginsnop
index 753c6e0092becc54a91261e0d5e27eb9b0447606..edad7af32d9cff91148222ca80a1cdc72af275b3 100644 (file)
@@ -494,3 +494,10 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
        }
        return false
 }
+
+// res = runtime.getg()
+func getg(res *gc.Node) {
+       var n1 gc.Node
+       gc.Nodreg(&n1, res.Type, arm.REGG)
+       gmove(&n1, res)
+}
index a73ddc6475dc88cc18db961fb3b7ec90a70f06d8..74be60e5ee49f8c77d11e050ce0abfb489a0f06d 100644 (file)
@@ -96,6 +96,7 @@ func main() {
        gc.Thearch.Dodiv = dodiv
        gc.Thearch.Excise = excise
        gc.Thearch.Expandchecks = expandchecks
+       gc.Thearch.Getg = getg
        gc.Thearch.Gins = gins
        gc.Thearch.Ginscon = ginscon
        gc.Thearch.Ginsnop = ginsnop
index e609d0e60f4d7beef6d948371ff751df7e933b0d..ceeec251c1a882eaa01c0d0bf794fe82203228d2 100644 (file)
@@ -671,3 +671,20 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
        }
        return false
 }
+
+// res = runtime.getg()
+func getg(res *gc.Node) {
+       var n1 gc.Node
+       gc.Regalloc(&n1, res.Type, res)
+       mov := optoas(gc.OAS, gc.Types[gc.Tptr])
+       p := gins(mov, nil, &n1)
+       p.From.Type = obj.TYPE_REG
+       p.From.Reg = x86.REG_TLS
+       p = gins(mov, nil, &n1)
+       p.From = p.To
+       p.From.Type = obj.TYPE_MEM
+       p.From.Index = x86.REG_TLS
+       p.From.Scale = 1
+       gmove(&n1, res)
+       gc.Regfree(&n1)
+}
index 1c50c21aa68c8fe7b1624ffa7c19e5eaf8e2e34e..36b54aa93516573cbcbd60a43b440911044be4a4 100644 (file)
@@ -62,6 +62,7 @@ func main() {
        gc.Thearch.Dodiv = dodiv
        gc.Thearch.Excise = excise
        gc.Thearch.Expandchecks = expandchecks
+       gc.Thearch.Getg = getg
        gc.Thearch.Gins = gins
        gc.Thearch.Ginscon = ginscon
        gc.Thearch.Ginsnop = ginsnop
index 0fc58543158e27ce723d5f51747c0517bc97a3af..94685d7fce99d2fb4c0bb40dbc72c2f291e813bd 100644 (file)
@@ -532,3 +532,10 @@ func expandchecks(firstp *obj.Prog) {
                p2.To.Offset = 0
        }
 }
+
+// res = runtime.getg()
+func getg(res *gc.Node) {
+       var n1 gc.Node
+       gc.Nodreg(&n1, res.Type, arm64.REGG)
+       gmove(&n1, res)
+}
index 1c03df59079df1a97d61132eb20504e2d79db207..2a8e0b7aa991d086a665f5ce98ecb3dfe1dbda9a 100644 (file)
@@ -77,6 +77,7 @@ func main() {
        gc.Thearch.Dodiv = cgen_div
        gc.Thearch.Excise = excise
        gc.Thearch.Expandchecks = expandchecks
+       gc.Thearch.Getg = getg
        gc.Thearch.Gins = gins
        gc.Thearch.Ginscon = ginscon
        gc.Thearch.Ginsnop = ginsnop
index 115c962a003432ab2e6ba46c78351472c2e34e8b..9a551b0cac33fa0494c9fe5771ea573816fbd897 100644 (file)
@@ -944,3 +944,20 @@ func addindex(index *gc.Node, width int64, addr *gc.Node) bool {
        }
        return false
 }
+
+// res = runtime.getg()
+func getg(res *gc.Node) {
+       var n1 gc.Node
+       gc.Regalloc(&n1, res.Type, res)
+       mov := optoas(gc.OAS, gc.Types[gc.Tptr])
+       p := gins(mov, nil, &n1)
+       p.From.Type = obj.TYPE_REG
+       p.From.Reg = x86.REG_TLS
+       p = gins(mov, nil, &n1)
+       p.From = p.To
+       p.From.Type = obj.TYPE_MEM
+       p.From.Index = x86.REG_TLS
+       p.From.Scale = 1
+       gmove(&n1, res)
+       gc.Regfree(&n1)
+}
index b9e6c32ebb2a81299b6adfb12a221674db7849ab..68eab5852cc42dddc1a0cae16e6a9536a0140a11 100644 (file)
@@ -71,6 +71,7 @@ func main() {
        gc.Thearch.Dodiv = dodiv
        gc.Thearch.Excise = excise
        gc.Thearch.Expandchecks = expandchecks
+       gc.Thearch.Getg = getg
        gc.Thearch.Gins = gins
        gc.Thearch.Ginscon = ginscon
        gc.Thearch.Ginsnop = ginsnop
index a009186ecb507f877c7edbc79bb068fabe8eb792..9af36a158b3e1dfdfda69bb091bdcc4f7162163e 100644 (file)
@@ -549,3 +549,10 @@ func expandchecks(firstp *obj.Prog) {
                p2.To.Offset = 0
        }
 }
+
+// res = runtime.getg()
+func getg(res *gc.Node) {
+       var n1 gc.Node
+       gc.Nodreg(&n1, res.Type, ppc64.REGG)
+       gmove(&n1, res)
+}
index 3b628ac243299d75ac8e29097606ea69f0139c53..886a2d12f4d9d5bd109e31d1d77a445b6e4c2646 100644 (file)
@@ -418,6 +418,10 @@ func Cgen(n *Node, res *Node) {
                Regfree(&n1)
                return
 
+       case OGETG:
+               Thearch.Getg(res)
+               return
+
                // symmetric binary
        case OAND,
                OOR,
index 38e358a735fe0decf2c699dd639577aac643e98c..589f20cff4d11433c8e8dc98ee84dfa0b7ce346c 100644 (file)
@@ -976,6 +976,7 @@ var opprec = []int{
        OCONV:         8,
        OCOPY:         8,
        ODELETE:       8,
+       OGETG:         8,
        OLEN:          8,
        OLITERAL:      8,
        OMAKESLICE:    8,
@@ -1363,7 +1364,7 @@ func exprfmt(n *Node, prec int) string {
                }
                return fmt.Sprintf("%v(%v)", Oconv(int(n.Op), obj.FmtSharp), Hconv(n.List, obj.FmtComma))
 
-       case OCALL, OCALLFUNC, OCALLINTER, OCALLMETH:
+       case OCALL, OCALLFUNC, OCALLINTER, OCALLMETH, OGETG:
                var f string
                f += exprfmt(n.Left, nprec)
                if n.Isddd {
index e52ff658459048c86363f1674a43522a34b290f9..4b7344a4954f5113186f853cb40c91012a7a70f4 100644 (file)
@@ -1002,6 +1002,10 @@ func gen(n *Node) {
        case ORETURN, ORETJMP:
                cgen_ret(n)
 
+       // Function calls turned into compiler intrinsics.
+       // At top level, can just ignore the call and make sure to preserve side effects in the argument, if any.
+       case OGETG:
+               // nothing
        case OSQRT:
                cgen_discard(n.Left)
 
index 027ad28fb1d954b49619e63b22017fdf8ce9c57d..a6faaa5f91794b3ab48e68b50feb402aaeb49fdc 100644 (file)
@@ -798,6 +798,7 @@ type Arch struct {
        Dodiv        func(int, *Node, *Node, *Node)
        Excise       func(*Flow)
        Expandchecks func(*obj.Prog)
+       Getg         func(*Node)
        Gins         func(int, *Node, *Node) *obj.Prog
        Ginscon      func(int, int64, *Node)
        Ginsnop      func()
index 736c7afda63b620c312d49397d894ddd069bec27..11cdf298f7c89f767c9f186cede20c8c056bd0fb 100644 (file)
@@ -304,6 +304,7 @@ const (
        ORETJMP // return to other function
        OPS     // compare parity set (for x86 NaN check)
        OSQRT   // sqrt(float64), on systems that have hw support
+       OGETG   // runtime.getg() (read g pointer)
 
        OEND
 )
index 43991648f8e62916fa05d0facb96a411620454a0..08262b15d48a95983b34fced9833015e209d7748 100644 (file)
@@ -1366,6 +1366,17 @@ OpSwitch:
                                t = t.Type
                        }
                        n.Type = t
+
+                       if n.Op == OCALLFUNC && n.Left.Op == ONAME && (compiling_runtime != 0 || n.Left.Sym.Pkg == Runtimepkg) && n.Left.Sym.Name == "getg" {
+                               // Emit code for runtime.getg() directly instead of calling function.
+                               // Most such rewrites (for example the similar one for math.Sqrt) should be done in walk,
+                               // so that the ordering pass can make sure to preserve the semantics of the original code
+                               // (in particular, the exact time of the function call) by introducing temporaries.
+                               // In this case, we know getg() always returns the same result within a given function
+                               // and we want to avoid the temporaries, so we do the rewrite earlier than is typical.
+                               n.Op = OGETG
+                       }
+
                        break OpSwitch
                }
 
@@ -1376,6 +1387,7 @@ OpSwitch:
                }
 
                n.Type = getoutargx(l.Type)
+
                break OpSwitch
 
        case OCAP, OLEN, OREAL, OIMAG:
index bf911169626d741600552978525a177a16d10baf..1012aa05380abc4626963cbccfa2f48e8ddf6931 100644 (file)
@@ -179,7 +179,8 @@ func walkstmt(np **Node) {
                OPRINTN,
                OPANIC,
                OEMPTY,
-               ORECOVER:
+               ORECOVER,
+               OGETG:
                if n.Typecheck == 0 {
                        Fatal("missing typecheck: %v", Nconv(n, obj.FmtSign))
                }
@@ -424,7 +425,8 @@ func walkexpr(np **Node, init **NodeList) {
                ONONAME,
                OINDREG,
                OEMPTY,
-               OPARAM:
+               OPARAM,
+               OGETG:
                goto ret
 
        case ONOT,
index bee8b29694c748a9374e2c6a31bf05081713ce6f..f2222d03b0277cedaa4e62ebd4f3cd5f482587ec 100644 (file)
@@ -1639,12 +1639,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0
        // traceback from goexit1 must hit code range of goexit
        BYTE    $0x90   // NOP
 
-TEXT runtime·getg(SB),NOSPLIT,$0-4
-       get_tls(CX)
-       MOVL    g(CX), AX
-       MOVL    AX, ret+0(FP)
-       RET
-
 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
        MOVL    addr+0(FP), AX
        PREFETCHT0      (AX)
index 946e151110ae8c89835ed331e63fb55ad1039ea9..0e5389fbd7d2658db9f3f55f2ba70e67a54bf5f9 100644 (file)
@@ -1673,12 +1673,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0
        // traceback from goexit1 must hit code range of goexit
        BYTE    $0x90   // NOP
 
-TEXT runtime·getg(SB),NOSPLIT,$0-8
-       get_tls(CX)
-       MOVQ    g(CX), AX
-       MOVQ    AX, ret+0(FP)
-       RET
-
 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
        MOVQ    addr+0(FP), AX
        PREFETCHT0      (AX)
index e144c4071fc954c625f7ea5a385ca038e2617b65..23e2cb9662e2d8731178be0f5cf7b553be69c66d 100644 (file)
@@ -1096,12 +1096,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$0-0
        // traceback from goexit1 must hit code range of goexit
        BYTE    $0x90   // NOP
 
-TEXT runtime·getg(SB),NOSPLIT,$0-4
-       get_tls(CX)
-       MOVL    g(CX), AX
-       MOVL    AX, ret+0(FP)
-       RET
-
 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
        MOVL    addr+0(FP), AX
        PREFETCHT0      (AX)
index a2e1e4be765cbbd548360506a43c995657f0ee28..b7042ea26b15881d992587e302d5f554a2dd39a1 100644 (file)
@@ -984,10 +984,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0
        // traceback from goexit1 must hit code range of goexit
        MOVW    R0, R0  // NOP
 
-TEXT runtime·getg(SB),NOSPLIT,$-4-4
-       MOVW    g, ret+0(FP)
-       RET
-
 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
        RET
 
index 3c09d53a11cb1175c7df891df558dc166b5120ef..0b21a1da2f42313934a5911545e0180357f96426 100644 (file)
@@ -903,10 +903,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0
        MOVD    R0, R0  // NOP
        BL      runtime·goexit1(SB)    // does not return
 
-TEXT runtime·getg(SB),NOSPLIT,$-8-8
-       MOVD    g, ret+0(FP)
-       RET
-
 // TODO(aram): use PRFM here.
 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
        RET
index ef6405003b8e6fbca0a5a64358ae8dcfd0d51deb..5b7ad41df01b4f7192b194f4a25f269a40032b7f 100644 (file)
@@ -1134,10 +1134,6 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0
        // traceback from goexit1 must hit code range of goexit
        MOVD    R0, R0  // NOP
 
-TEXT runtime·getg(SB),NOSPLIT,$-8-8
-       MOVD    g, ret+0(FP)
-       RETURN
-
 TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
        RETURN
 
index 99d8dd45e211c1150abc3d7bed8cf40092f25a4c..7b6fbb0349da2b6aa0406a7fa669247940b1a048 100644 (file)
@@ -18,6 +18,9 @@ func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
        return unsafe.Pointer(uintptr(p) + x)
 }
 
+// getg returns the pointer to the current g.
+// The compiler rewrites calls to this function into instructions
+// that fetch the g directly (from TLS or from the dedicated register).
 func getg() *g
 
 // mcall switches from the g to the g0 stack and invokes fn(g),