From 4d0f3a1c95c11594e04f40fbfffb0dc10c1d097c Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 27 Apr 2015 22:45:57 -0400 Subject: [PATCH] cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The bitmaps were 2 bits per pointer because we needed to distinguish scalar, pointer, multiword, and we used the leftover value to distinguish uninitialized from scalar, even though the garbage collector (GC) didn't care. Now that there are no multiword structures from the GC's point of view, cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not. The GC assumes the same layout for stack frames and for the maps describing the global data and bss sections, so change them all in one CL. The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since the 2-bit representation lives (at least for now) in some of the reflect data. Because these stack frame bitmaps are stored directly in the rodata in the binary, this CL reduces the size of the 6g binary by about 1.1%. Performance change is basically a wash, but using less memory, and smaller binaries, and enables other bitmap reductions. name old mean new mean delta BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005) BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001) BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141) BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001) BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095) BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008) BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014) BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364) BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010) BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368) BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484) BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543) BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000) BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023) BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126) BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975) BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153) BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597) BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804) BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881) BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561) BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000) BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000) BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000) BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000) BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019) BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000) BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025) BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000) BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000) BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305) BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000) BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465) BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075) BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337) BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291) BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507) BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313) BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312) BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000) BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007) BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670) BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828) BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392) BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813) BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000) BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000) BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000) BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985) BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320) BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799) BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667) BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001) BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000) BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011) BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185) BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001) BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000) BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000) BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000) BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979) BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777) BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771) BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004) BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004) BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000) BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081) BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027) BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022) BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064) BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001) BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007) BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143) BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278) BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252) BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003) BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581) BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002) BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879) BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257) BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678) BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000) BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000) BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000) BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000) BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000) BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000) BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767) BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347) BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793) Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9 Reviewed-on: https://go-review.googlesource.com/9406 Reviewed-by: Rick Hudson --- src/cmd/internal/gc/pgen.go | 6 +- src/cmd/internal/gc/plive.go | 84 ++++++++++----------- src/cmd/internal/gc/reflect.go | 12 ++- src/cmd/internal/gc/walk.go | 17 ++--- src/runtime/gcinfo_test.go | 60 +++++++-------- src/runtime/heapdump.go | 36 ++++----- src/runtime/mbitmap.go | 56 ++++++++------ src/runtime/mfinal.go | 45 ++++++----- src/runtime/mgcmark.go | 131 ++++++++++++++++++--------------- src/runtime/stack1.go | 28 +++---- 10 files changed, 237 insertions(+), 238 deletions(-) diff --git a/src/cmd/internal/gc/pgen.go b/src/cmd/internal/gc/pgen.go index 1667a5c13e..f247a685ca 100644 --- a/src/cmd/internal/gc/pgen.go +++ b/src/cmd/internal/gc/pgen.go @@ -142,12 +142,12 @@ func emitptrargsmap() { var xoffset int64 if Curfn.Type.Thistuple > 0 { xoffset = 0 - twobitwalktype1(getthisx(Curfn.Type), &xoffset, bv) + onebitwalktype1(getthisx(Curfn.Type), &xoffset, bv) } if Curfn.Type.Intuple > 0 { xoffset = 0 - twobitwalktype1(getinargx(Curfn.Type), &xoffset, bv) + onebitwalktype1(getinargx(Curfn.Type), &xoffset, bv) } for j := 0; int32(j) < bv.n; j += 32 { @@ -155,7 +155,7 @@ func emitptrargsmap() { } if Curfn.Type.Outtuple > 0 { xoffset = 0 - twobitwalktype1(getoutargx(Curfn.Type), &xoffset, bv) + onebitwalktype1(getoutargx(Curfn.Type), &xoffset, bv) for j := 0; int32(j) < bv.n; j += 32 { off = duint32(sym, off, bv.b[j/32]) } diff --git a/src/cmd/internal/gc/plive.go b/src/cmd/internal/gc/plive.go index fe6905a062..040a77814e 100644 --- a/src/cmd/internal/gc/plive.go +++ b/src/cmd/internal/gc/plive.go @@ -886,11 +886,11 @@ func checkptxt(fn *Node, firstp *obj.Prog) { // NOTE: The bitmap for a specific type t should be cached in t after the first run // and then simply copied into bv at the correct offset on future calls with -// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, twobitwalktype1 +// the same type t. On https://rsc.googlecode.com/hg/testdata/slow.go, onebitwalktype1 // accounts for 40% of the 6g execution time. -func twobitwalktype1(t *Type, xoffset *int64, bv Bvec) { +func onebitwalktype1(t *Type, xoffset *int64, bv Bvec) { if t.Align > 0 && *xoffset&int64(t.Align-1) != 0 { - Fatal("twobitwalktype1: invalid initial alignment, %v", t) + Fatal("onebitwalktype1: invalid initial alignment, %v", t) } switch t.Etype { @@ -910,10 +910,6 @@ func twobitwalktype1(t *Type, xoffset *int64, bv Bvec) { TFLOAT64, TCOMPLEX64, TCOMPLEX128: - for i := int64(0); i < t.Width; i++ { - bvset(bv, int32(((*xoffset+i)/int64(Widthptr))*obj.BitsPerPointer)) // 1 = live scalar (BitsScalar) - } - *xoffset += t.Width case TPTR32, @@ -923,46 +919,46 @@ func twobitwalktype1(t *Type, xoffset *int64, bv Bvec) { TCHAN, TMAP: if *xoffset&int64(Widthptr-1) != 0 { - Fatal("twobitwalktype1: invalid alignment, %v", t) + Fatal("onebitwalktype1: invalid alignment, %v", t) } - bvset(bv, int32((*xoffset/int64(Widthptr))*obj.BitsPerPointer+1)) // 2 = live ptr (BitsPointer) + bvset(bv, int32(*xoffset/int64(Widthptr))) // pointer *xoffset += t.Width - // struct { byte *str; intgo len; } case TSTRING: + // struct { byte *str; intgo len; } if *xoffset&int64(Widthptr-1) != 0 { - Fatal("twobitwalktype1: invalid alignment, %v", t) + Fatal("onebitwalktype1: invalid alignment, %v", t) } - bvset(bv, int32((*xoffset/int64(Widthptr))*obj.BitsPerPointer+1)) // 2 = live ptr in first slot (BitsPointer) + bvset(bv, int32(*xoffset/int64(Widthptr))) //pointer in first slot *xoffset += t.Width - // struct { Itab *tab; union { void *ptr, uintptr val } data; } - // or, when isnilinter(t)==true: - // struct { Type *type; union { void *ptr, uintptr val } data; } case TINTER: + // struct { Itab *tab; void *data; } + // or, when isnilinter(t)==true: + // struct { Type *type; void *data; } if *xoffset&int64(Widthptr-1) != 0 { - Fatal("twobitwalktype1: invalid alignment, %v", t) + Fatal("onebitwalktype1: invalid alignment, %v", t) } - bvset(bv, int32((*xoffset/int64(Widthptr))*obj.BitsPerPointer+1)) // 2 = live ptr in first slot (BitsPointer) - bvset(bv, int32((*xoffset/int64(Widthptr))*obj.BitsPerPointer+3)) // 2 = live ptr in second slot (BitsPointer) + bvset(bv, int32(*xoffset/int64(Widthptr))) // pointer in first slot + bvset(bv, int32(*xoffset/int64(Widthptr)+1)) // pointer in second slot *xoffset += t.Width - // The value of t->bound is -1 for slices types and >0 for - // for fixed array types. All other values are invalid. case TARRAY: + // The value of t->bound is -1 for slices types and >0 for + // for fixed array types. All other values are invalid. if t.Bound < -1 { - Fatal("twobitwalktype1: invalid bound, %v", t) + Fatal("onebitwalktype1: invalid bound, %v", t) } if Isslice(t) { // struct { byte *array; uintgo len; uintgo cap; } if *xoffset&int64(Widthptr-1) != 0 { - Fatal("twobitwalktype1: invalid TARRAY alignment, %v", t) + Fatal("onebitwalktype1: invalid TARRAY alignment, %v", t) } - bvset(bv, int32((*xoffset/int64(Widthptr))*obj.BitsPerPointer+1)) // 2 = live ptr in first slot (BitsPointer) + bvset(bv, int32(*xoffset/int64(Widthptr))) // pointer in first slot (BitsPointer) *xoffset += t.Width } else { for i := int64(0); i < t.Bound; i++ { - twobitwalktype1(t.Type, xoffset, bv) + onebitwalktype1(t.Type, xoffset, bv) } } @@ -972,14 +968,14 @@ func twobitwalktype1(t *Type, xoffset *int64, bv Bvec) { for t1 := t.Type; t1 != nil; t1 = t1.Down { fieldoffset = t1.Width *xoffset += fieldoffset - o - twobitwalktype1(t1.Type, xoffset, bv) + onebitwalktype1(t1.Type, xoffset, bv) o = fieldoffset + t1.Type.Width } *xoffset += t.Width - o default: - Fatal("twobitwalktype1: unexpected type, %v", t) + Fatal("onebitwalktype1: unexpected type, %v", t) } } @@ -996,7 +992,7 @@ func argswords() int32 { // Generates live pointer value maps for arguments and local variables. The // this argument and the in arguments are always assumed live. The vars // argument is an array of Node*s. -func twobitlivepointermap(lv *Liveness, liveout Bvec, vars []*Node, args Bvec, locals Bvec) { +func onebitlivepointermap(lv *Liveness, liveout Bvec, vars []*Node, args Bvec, locals Bvec) { var node *Node var xoffset int64 @@ -1009,11 +1005,11 @@ func twobitlivepointermap(lv *Liveness, liveout Bvec, vars []*Node, args Bvec, l switch node.Class { case PAUTO: xoffset = node.Xoffset + stkptrsize - twobitwalktype1(node.Type, &xoffset, locals) + onebitwalktype1(node.Type, &xoffset, locals) case PPARAM, PPARAMOUT: xoffset = node.Xoffset - twobitwalktype1(node.Type, &xoffset, args) + onebitwalktype1(node.Type, &xoffset, args) } } @@ -1025,13 +1021,13 @@ func twobitlivepointermap(lv *Liveness, liveout Bvec, vars []*Node, args Bvec, l if thisargtype != nil { xoffset = 0 - twobitwalktype1(thisargtype, &xoffset, args) + onebitwalktype1(thisargtype, &xoffset, args) } inargtype := getinargx(lv.fn.Type) if inargtype != nil { xoffset = 0 - twobitwalktype1(inargtype, &xoffset, args) + onebitwalktype1(inargtype, &xoffset, args) } } @@ -1202,15 +1198,15 @@ func livenesssolve(lv *Liveness) { func islive(n *Node, args Bvec, locals Bvec) bool { switch n.Class { case PPARAM, PPARAMOUT: - for i := 0; int64(i) < n.Type.Width/int64(Widthptr)*obj.BitsPerPointer; i++ { - if bvget(args, int32(n.Xoffset/int64(Widthptr)*obj.BitsPerPointer+int64(i))) != 0 { + for i := 0; int64(i) < n.Type.Width/int64(Widthptr); i++ { + if bvget(args, int32(n.Xoffset/int64(Widthptr)+int64(i))) != 0 { return true } } case PAUTO: - for i := 0; int64(i) < n.Type.Width/int64(Widthptr)*obj.BitsPerPointer; i++ { - if bvget(locals, int32((n.Xoffset+stkptrsize)/int64(Widthptr)*obj.BitsPerPointer+int64(i))) != 0 { + for i := 0; int64(i) < n.Type.Width/int64(Widthptr); i++ { + if bvget(locals, int32((n.Xoffset+stkptrsize)/int64(Widthptr)+int64(i))) != 0 { return true } } @@ -1239,7 +1235,7 @@ func livenessepilogue(lv *Liveness) { avarinit := bvalloc(nvars) any := bvalloc(nvars) all := bvalloc(nvars) - ambig := bvalloc(localswords() * obj.BitsPerPointer) + ambig := bvalloc(localswords()) nmsg := int32(0) startmsg := int32(0) @@ -1294,7 +1290,7 @@ func livenessepilogue(lv *Liveness) { // Record in 'ambiguous' bitmap. xoffset = n.Xoffset + stkptrsize - twobitwalktype1(n.Type, &xoffset, ambig) + onebitwalktype1(n.Type, &xoffset, ambig) } } } @@ -1303,10 +1299,10 @@ func livenessepilogue(lv *Liveness) { // value we are tracking. // Live stuff first. - args = bvalloc(argswords() * obj.BitsPerPointer) + args = bvalloc(argswords()) lv.argslivepointers = append(lv.argslivepointers, args) - locals = bvalloc(localswords() * obj.BitsPerPointer) + locals = bvalloc(localswords()) lv.livepointers = append(lv.livepointers, locals) if debuglive >= 3 { @@ -1319,7 +1315,7 @@ func livenessepilogue(lv *Liveness) { // because the any/all calculation requires walking forward // over the block (as this loop does), while the liveout // requires walking backward (as the next loop does). - twobitlivepointermap(lv, any, lv.vars, args, locals) + onebitlivepointermap(lv, any, lv.vars, args, locals) } if p == bb.last { @@ -1394,7 +1390,7 @@ func livenessepilogue(lv *Liveness) { args = lv.argslivepointers[pos] locals = lv.livepointers[pos] - twobitlivepointermap(lv, liveout, lv.vars, args, locals) + onebitlivepointermap(lv, liveout, lv.vars, args, locals) // Ambiguously live variables are zeroed immediately after // function entry. Mark them live for all the non-entry bitmaps @@ -1727,7 +1723,7 @@ func livenessprintdebug(lv *Liveness) { // length of the bitmaps. All bitmaps are assumed to be of equal length. The // words that are followed are the raw bitmap words. The arr argument is an // array of Node*s. -func twobitwritesymbol(arr []Bvec, sym *Sym) { +func onebitwritesymbol(arr []Bvec, sym *Sym) { var i int var j int var word uint32 @@ -1816,9 +1812,9 @@ func liveness(fn *Node, firstp *obj.Prog, argssym *Sym, livesym *Sym) { } // Emit the live pointer map data structures - twobitwritesymbol(lv.livepointers, livesym) + onebitwritesymbol(lv.livepointers, livesym) - twobitwritesymbol(lv.argslivepointers, argssym) + onebitwritesymbol(lv.argslivepointers, argssym) // Free everything. for l := fn.Func.Dcl; l != nil; l = l.Next { diff --git a/src/cmd/internal/gc/reflect.go b/src/cmd/internal/gc/reflect.go index 824ed0b427..804f888fd3 100644 --- a/src/cmd/internal/gc/reflect.go +++ b/src/cmd/internal/gc/reflect.go @@ -1404,7 +1404,7 @@ func gengcmask(t *Type, gcmask []byte) { xoffset := int64(0) vec := bvalloc(2 * int32(Widthptr) * 8) - twobitwalktype1(t, &xoffset, vec) + onebitwalktype1(t, &xoffset, vec) // Unfold the mask for the GC bitmap format: // 4 bits per word, 2 high bits encode pointer info. @@ -1419,13 +1419,11 @@ func gengcmask(t *Type, gcmask []byte) { var bits uint8 for j := int64(0); j <= (nptr % 2); j++ { for i = 0; i < nptr; i++ { - bits = uint8(bvget(vec, int32(i*obj.BitsPerPointer)) | bvget(vec, int32(i*obj.BitsPerPointer+1))<<1) - - // Some fake types (e.g. Hmap) has missing fileds. - // twobitwalktype1 generates BitsDead for that holes, - // replace BitsDead with BitsScalar. - if bits == obj.BitsDead { + // convert 0=scalar / 1=pointer to GC bit encoding + if bvget(vec, int32(i)) == 0 { bits = obj.BitsScalar + } else { + bits = obj.BitsPointer } bits <<= 2 if half { diff --git a/src/cmd/internal/gc/walk.go b/src/cmd/internal/gc/walk.go index 37e18edf12..c32a8137d6 100644 --- a/src/cmd/internal/gc/walk.go +++ b/src/cmd/internal/gc/walk.go @@ -2243,30 +2243,23 @@ func applywritebarrier(n *Node, init **NodeList) *Node { } else if t.Width <= int64(4*Widthptr) { x := int64(0) if applywritebarrier_bv.b == nil { - applywritebarrier_bv = bvalloc(obj.BitsPerPointer * 4) + applywritebarrier_bv = bvalloc(4) } bvresetall(applywritebarrier_bv) - twobitwalktype1(t, &x, applywritebarrier_bv) - const ( - PtrBit = 1 - ) - // The bvgets are looking for BitsPointer in successive slots. - if obj.BitsPointer != 1<> (i % 8) & typeMask { - default: - throw("unexpected pointer bits") - case typeDead: - // typeDead has already been processed in makeheapobjbv. - // We should only see it in stack maps, in which case we should continue processing. - case typeScalar: - // ok - case typePointer: + for i := uintptr(0); i < uintptr(bv.n); i++ { + if bv.bytedata[i/8]>>(i%8)&1 == 1 { dumpint(fieldKindPtr) - dumpint(uint64(offset + i/typeBitsWidth*ptrSize)) + dumpint(uint64(offset + i*ptrSize)) } } } @@ -278,7 +270,7 @@ func dumpframe(s *stkframe, arg unsafe.Pointer) bool { var bv bitvector if stkmap != nil && stkmap.n > 0 { bv = stackmapdata(stkmap, pcdata) - dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n/typeBitsWidth*ptrSize))) + dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n*ptrSize))) } else { bv.n = -1 } @@ -326,7 +318,7 @@ func dumpframe(s *stkframe, arg unsafe.Pointer) bool { } else if stkmap.n > 0 { // Locals bitmap information, scan just the pointers in // locals. - dumpbv(&bv, s.varp-uintptr(bv.n)/typeBitsWidth*ptrSize-s.sp) + dumpbv(&bv, s.varp-uintptr(bv.n)*ptrSize-s.sp) } dumpint(fieldKindEol) @@ -651,7 +643,7 @@ func dumpmemprof() { } } -var dumphdr = []byte("go1.4 heap dump\n") +var dumphdr = []byte("go1.5 heap dump\n") func mdump() { // make sure we're done sweeping @@ -720,18 +712,21 @@ func dumpbvtypes(bv *bitvector, base unsafe.Pointer) { func makeheapobjbv(p uintptr, size uintptr) bitvector { // Extend the temp buffer if necessary. nptr := size / ptrSize - if uintptr(len(tmpbuf)) < nptr*typeBitsWidth/8+1 { + if uintptr(len(tmpbuf)) < nptr/8+1 { if tmpbuf != nil { sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys) } - n := nptr*typeBitsWidth/8 + 1 + n := nptr/8 + 1 p := sysAlloc(n, &memstats.other_sys) if p == nil { throw("heapdump: out of memory") } tmpbuf = (*[1 << 30]byte)(p)[:n] } - // Convert heap bitmap to type bitmap. + // Convert heap bitmap to pointer bitmap. + for i := uintptr(0); i < nptr/8+1; i++ { + tmpbuf[i] = 0 + } i := uintptr(0) hbits := heapBitsForAddr(p) for ; i < nptr; i++ { @@ -740,8 +735,9 @@ func makeheapobjbv(p uintptr, size uintptr) bitvector { break // end of object } hbits = hbits.next() - tmpbuf[i*typeBitsWidth/8] &^= (typeMask << ((i * typeBitsWidth) % 8)) - tmpbuf[i*typeBitsWidth/8] |= bits << ((i * typeBitsWidth) % 8) + if bits == typePointer { + tmpbuf[i/8] |= 1 << (i % 8) + } } - return bitvector{int32(i * typeBitsWidth), &tmpbuf[0]} + return bitvector{int32(i), &tmpbuf[0]} } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index a1ebf03e69..f0c7520e38 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -4,9 +4,20 @@ // Garbage collector: type and heap bitmaps. // +// Stack, data, and bss bitmaps +// +// Not handled in this file, but worth mentioning: stack frames and global data +// in the data and bss sections are described by 1-bit bitmaps in which 0 means +// scalar or uninitialized or dead and 1 means pointer to visit during GC. +// +// Comparing this 1-bit form with the 2-bit form described below, 0 represents +// both the 2-bit 00 and 01, while 1 represents the 2-bit 10. +// Therefore conversions between the two (until the 2-bit form is gone) +// can be done by x>>1 for 2-bit to 1-bit and x+1 for 1-bit to 2-bit. +// // Type bitmaps // -// The global variables (in the data and bss sections) and types that aren't too large +// Types that aren't too large // record information about the layout of their memory words using a type bitmap. // The bitmap holds two bits for each pointer-sized word. The two-bit values are: // @@ -17,7 +28,6 @@ // // typeDead only appears in type bitmaps in Go type descriptors // and in type bitmaps embedded in the heap bitmap (see below). -// It is not used in the type bitmap for the global variables. // // Heap bitmap // @@ -71,9 +81,8 @@ const ( typePointer = 2 typePointerCheckmarked = 3 - typeBitsWidth = 2 // # of type bits per pointer-sized word - typeMask = 1<>= 1 // convert typePointer to 1, others to 0 + mask[pos/8] |= v << (pos % 8) + pos++ } } prog = addb(prog, round(uintptr(siz)*typeBitsWidth, 8)/8) @@ -668,13 +677,13 @@ func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) // Unrolls GC program prog for data/bss, returns dense GC mask. func unrollglobgcprog(prog *byte, size uintptr) bitvector { - masksize := round(round(size, ptrSize)/ptrSize*typeBitsWidth, 8) / 8 + masksize := round(round(size, ptrSize)/ptrSize, 8) / 8 mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys)) mask[masksize] = 0xa1 pos := uintptr(0) prog = unrollgcprog1(&mask[0], prog, &pos, false, false) - if pos != size/ptrSize*typeBitsWidth { - print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize*typeBitsWidth, "\n") + if pos != size/ptrSize { + print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize, "\n") throw("unrollglobgcprog: bad program size") } if *prog != insEnd { @@ -744,8 +753,6 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { *mask = nil *len = 0 - const typeBitsPerByte = 8 / typeBitsWidth - // data for datap := &firstmoduledata; datap != nil; datap = datap.next { if datap.data <= uintptr(p) && uintptr(p) < datap.edata { @@ -754,8 +761,9 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { *mask = &make([]byte, *len)[0] for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - datap.data) / ptrSize - bits := (*(*byte)(add(unsafe.Pointer(datap.gcdatamask.bytedata), off/typeBitsPerByte)) >> ((off % typeBitsPerByte) * typeBitsWidth)) & typeMask - *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + bits := (*addb(datap.gcdatamask.bytedata, off/8) >> (off % 8)) & 1 + bits += 1 // convert 1-bit to 2-bit + *addb(*mask, i/ptrSize) = bits } return } @@ -767,8 +775,9 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { *mask = &make([]byte, *len)[0] for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - datap.bss) / ptrSize - bits := (*(*byte)(add(unsafe.Pointer(datap.gcbssmask.bytedata), off/typeBitsPerByte)) >> ((off % typeBitsPerByte) * typeBitsWidth)) & typeMask - *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + bits := (*addb(datap.gcbssmask.bytedata, off/8) >> (off % 8)) & 1 + bits += 1 // convert 1-bit to 2-bit + *addb(*mask, i/ptrSize) = bits } return } @@ -782,7 +791,7 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { *mask = &make([]byte, *len)[0] for i := uintptr(0); i < n; i += ptrSize { bits := heapBitsForAddr(base + i).typeBits() - *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + *addb(*mask, i/ptrSize) = bits } return } @@ -810,14 +819,15 @@ func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) { return } bv := stackmapdata(stkmap, pcdata) - size := uintptr(bv.n) / typeBitsWidth * ptrSize + size := uintptr(bv.n) * ptrSize n := (*ptrtype)(unsafe.Pointer(t)).elem.size *len = n / ptrSize *mask = &make([]byte, *len)[0] for i := uintptr(0); i < n; i += ptrSize { off := (uintptr(p) + i - frame.varp + size) / ptrSize - bits := ((*(*byte)(add(unsafe.Pointer(bv.bytedata), off*typeBitsWidth/8))) >> ((off * typeBitsWidth) % 8)) & typeMask - *(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits + bits := (*addb(bv.bytedata, off/8) >> (off % 8)) & 1 + bits += 1 // convert 1-bit to 2-bit + *addb(*mask, i/ptrSize) = bits } } } diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go index e3ff8ff9d4..7e1773c88c 100644 --- a/src/runtime/mfinal.go +++ b/src/runtime/mfinal.go @@ -20,7 +20,7 @@ var finlock mutex // protects the following variables var fing *g // goroutine that runs finalizers var finq *finblock // list of finalizers that are to be executed var finc *finblock // cache of free blocks -var finptrmask [_FinBlockSize / typeBitmapScale]byte +var finptrmask [_FinBlockSize / ptrSize / 8]byte var fingwait bool var fingwake bool var allfin *finblock // list of all blocks @@ -35,25 +35,31 @@ type finalizer struct { } var finalizer1 = [...]byte{ - // Each Finalizer is 5 words, ptr ptr uintptr ptr ptr. - // Each byte describes 4 words. - // Need 4 Finalizers described by 5 bytes before pattern repeats: - // ptr ptr uintptr ptr ptr - // ptr ptr uintptr ptr ptr - // ptr ptr uintptr ptr ptr - // ptr ptr uintptr ptr ptr + // Each Finalizer is 5 words, ptr ptr INT ptr ptr (INT = uintptr here) + // Each byte describes 8 words. + // Need 8 Finalizers described by 5 bytes before pattern repeats: + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr + // ptr ptr INT ptr ptr // aka - // ptr ptr uintptr ptr - // ptr ptr ptr uintptr - // ptr ptr ptr ptr - // uintptr ptr ptr ptr - // ptr uintptr ptr ptr + // + // ptr ptr INT ptr ptr ptr ptr INT + // ptr ptr ptr ptr INT ptr ptr ptr + // ptr INT ptr ptr ptr ptr INT ptr + // ptr ptr ptr INT ptr ptr ptr ptr + // INT ptr ptr ptr ptr INT ptr ptr + // // Assumptions about Finalizer layout checked below. - typePointer | typePointer<<2 | typeScalar<<4 | typePointer<<6, - typePointer | typePointer<<2 | typePointer<<4 | typeScalar<<6, - typePointer | typePointer<<2 | typePointer<<4 | typePointer<<6, - typeScalar | typePointer<<2 | typePointer<<4 | typePointer<<6, - typePointer | typeScalar<<2 | typePointer<<4 | typePointer<<6, + 1<<0 | 1<<1 | 0<<2 | 1<<3 | 1<<4 | 1<<5 | 1<<6 | 0<<7, + 1<<0 | 1<<1 | 1<<2 | 1<<3 | 0<<4 | 1<<5 | 1<<6 | 1<<7, + 1<<0 | 0<<1 | 1<<2 | 1<<3 | 1<<4 | 1<<5 | 0<<6 | 1<<7, + 1<<0 | 1<<1 | 1<<2 | 0<<3 | 1<<4 | 1<<5 | 1<<6 | 1<<7, + 0<<0 | 1<<1 | 1<<2 | 1<<3 | 1<<4 | 0<<5 | 1<<6 | 1<<7, } func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot *ptrtype) { @@ -72,8 +78,7 @@ func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot unsafe.Offsetof(finalizer{}.arg) != ptrSize || unsafe.Offsetof(finalizer{}.nret) != 2*ptrSize || unsafe.Offsetof(finalizer{}.fint) != 3*ptrSize || - unsafe.Offsetof(finalizer{}.ot) != 4*ptrSize || - typeBitsWidth != 2) { + unsafe.Offsetof(finalizer{}.ot) != 4*ptrSize) { throw("finalizer out of sync") } for i := range finptrmask { diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 1bb709c895..401507545f 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -51,7 +51,7 @@ func gcscan_m() { } // ptrmask for an allocation containing a single pointer. -var oneptr = [...]uint8{typePointer} +var oneptrmask = [...]uint8{1} //go:nowritebarrier func markroot(desc *parfor, i uint32) { @@ -98,9 +98,9 @@ func markroot(desc *parfor, i uint32) { // A finalizer can be set for an inner byte of an object, find object beginning. p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize if gcphase != _GCscan { - scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase + scanobject(p, &gcw) // scanned during mark termination } - scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw) + scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptrmask[0], &gcw) } } @@ -383,7 +383,7 @@ func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWork) { throw("scanframe: bad symbol table") } bv := stackmapdata(stkmap, pcdata) - size = (uintptr(bv.n) / typeBitsWidth) * ptrSize + size = uintptr(bv.n) * ptrSize scanblock(frame.varp-size, size, bv.bytedata, gcw) } @@ -405,7 +405,7 @@ func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWork) { } bv = stackmapdata(stkmap, pcdata) } - scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw) + scanblock(frame.argp, uintptr(bv.n)*ptrSize, bv.bytedata, gcw) } } @@ -447,7 +447,7 @@ func gcDrain(gcw *gcWork, flushScanCredit int64) { // out of the wbuf passed in + a single object placed // into an empty wbuf in scanobject so there could be // a performance hit as we keep fetching fresh wbufs. - scanobject(b, 0, nil, gcw) + scanobject(b, gcw) // Flush background scan work credit to the global // account if we've accumulated enough locally so @@ -499,7 +499,7 @@ func gcDrainUntilPreempt(gcw *gcWork, flushScanCredit int64) { // No more work break } - scanobject(b, 0, nil, gcw) + scanobject(b, gcw) // Flush background scan work credit to the global // account if we've accumulated enough locally so @@ -534,12 +534,12 @@ func gcDrainN(gcw *gcWork, scanWork int64) { if b == 0 { return } - scanobject(b, 0, nil, gcw) + scanobject(b, gcw) } } -// scanblock scans b as scanobject would. -// If the gcphase is GCscan, scanblock performs additional checks. +// scanblock scans b as scanobject would, but using an explicit +// pointer bitmap instead of the heap bitmap. //go:nowritebarrier func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { // Use local copies of original parameters, so that a stack trace @@ -548,59 +548,69 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { b := b0 n := n0 - // ptrmask can have 2 possible values: - // 1. nil - obtain pointer mask from GC bitmap. - // 2. pointer to a compact mask (for stacks and data). + arena_start := mheap_.arena_start + arena_used := mheap_.arena_used + scanWork := int64(0) - scanobject(b, n, ptrmask, gcw) - if gcphase == _GCscan { - if inheap(b) && ptrmask == nil { - // b is in heap, we are in GCscan so there should be a ptrmask. - throw("scanblock: In GCscan phase and inheap is true.") + for i := uintptr(0); i < n; { + // Find bits for the next word. + bits := uint32(*addb(ptrmask, i/(ptrSize*8))) + if bits == 0 { + i += ptrSize * 8 + continue + } + for j := 0; j < 8 && i < n; j++ { + if bits&1 != 0 { + // Same work as in scanobject; see comments there. + obj := *(*uintptr)(unsafe.Pointer(b + i)) + scanWork++ + if obj != 0 && arena_start <= obj && obj < arena_used { + if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark { + checkwbshadow((*uintptr)(unsafe.Pointer(b + i))) + } + if obj, hbits, span := heapBitsForObject(obj); obj != 0 { + greyobject(obj, b, i, hbits, span, gcw) + } + } + } + bits >>= 1 + i += ptrSize } } + + gcw.bytesMarked += uint64(n) + gcw.scanWork += scanWork } -// scanobject scans memory starting at b, adding pointers to gcw. -// If ptrmask != nil, it specifies the pointer mask starting at b and -// n specifies the number of bytes to scan. -// If ptrmask == nil, b must point to the beginning of a heap object -// and scanobject consults the GC bitmap for the pointer mask and the -// spans for the size of the object (it ignores n). +// scanobject scans the object starting at b, adding pointers to gcw. +// b must point to the beginning of a heap object; scanobject consults +// the GC bitmap for the pointer mask and the spans for the size of the +// object (it ignores n). //go:nowritebarrier -func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWork) { +func scanobject(b uintptr, gcw *gcWork) { arena_start := mheap_.arena_start arena_used := mheap_.arena_used scanWork := int64(0) // Find bits of the beginning of the object. - var hbits heapBits - - if ptrmask == nil { - // b must point to the beginning of a heap object, so - // we can get its bits and span directly. - hbits = heapBitsForAddr(b) - s := spanOfUnchecked(b) - n = s.elemsize - if n == 0 { - throw("scanobject n == 0") - } + // b must point to the beginning of a heap object, so + // we can get its bits and span directly. + hbits := heapBitsForAddr(b) + s := spanOfUnchecked(b) + n := s.elemsize + if n == 0 { + throw("scanobject n == 0") } + for i := uintptr(0); i < n; i += ptrSize { // Find bits for this word. - var bits uintptr - if ptrmask != nil { - // dense mask (stack or data) - bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask - } else { - if i != 0 { - // Avoid needless hbits.next() on last iteration. - hbits = hbits.next() - } - bits = uintptr(hbits.typeBits()) - if bits == typeDead { - break // no more pointers in this object - } + if i != 0 { + // Avoid needless hbits.next() on last iteration. + hbits = hbits.next() + } + bits := uintptr(hbits.typeBits()) + if bits == typeDead { + break // no more pointers in this object } if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked @@ -608,10 +618,13 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWork) { } if bits&typePointer != typePointer { - print("gc useCheckmark=", useCheckmark, " b=", hex(b), " ptrmask=", ptrmask, "\n") + print("gc useCheckmark=", useCheckmark, " b=", hex(b), "\n") throw("unexpected garbage collection bits") } + // Work here is duplicated in scanblock. + // If you make changes here, make changes there too. + obj := *(*uintptr)(unsafe.Pointer(b + i)) // Track the scan work performed as a way to estimate @@ -626,17 +639,15 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWork) { // At this point we have extracted the next potential pointer. // Check if it points into heap. - if obj == 0 || obj < arena_start || obj >= arena_used { - continue - } - - if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark { - checkwbshadow((*uintptr)(unsafe.Pointer(b + i))) - } + if obj != 0 && arena_start <= obj && obj < arena_used { + if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark { + checkwbshadow((*uintptr)(unsafe.Pointer(b + i))) + } - // Mark the object. - if obj, hbits, span := heapBitsForObject(obj); obj != 0 { - greyobject(obj, b, i, hbits, span, gcw) + // Mark the object. + if obj, hbits, span := heapBitsForObject(obj); obj != 0 { + greyobject(obj, b, i, hbits, span, gcw) + } } } gcw.bytesMarked += uint64(n) diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go index db7e3cbeca..4fa1a58ea8 100644 --- a/src/runtime/stack1.go +++ b/src/runtime/stack1.go @@ -298,10 +298,9 @@ func stackfree(stk stack) { var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real -var mapnames = []string{ - typeDead: "---", - typeScalar: "scalar", - typePointer: "ptr", +var ptrnames = []string{ + 0: "scalar", + 1: "ptr", } // Stack frame layout @@ -365,8 +364,8 @@ func gobv(bv bitvector) gobitvector { } } -func ptrbits(bv *gobitvector, i uintptr) uint8 { - return (bv.bytedata[i/4] >> ((i & 3) * 2)) & 3 +func ptrbit(bv *gobitvector, i uintptr) uint8 { + return (bv.bytedata[i/8] >> (i % 8)) & 1 } // bv describes the memory starting at address scanp. @@ -376,21 +375,12 @@ func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f minp := adjinfo.old.lo maxp := adjinfo.old.hi delta := adjinfo.delta - num := uintptr(bv.n) / typeBitsWidth + num := uintptr(bv.n) for i := uintptr(0); i < num; i++ { if stackDebug >= 4 { - print(" ", add(scanp, i*ptrSize), ":", mapnames[ptrbits(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*ptrSize))), " # ", i, " ", bv.bytedata[i/4], "\n") + print(" ", add(scanp, i*ptrSize), ":", ptrnames[ptrbit(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*ptrSize))), " # ", i, " ", bv.bytedata[i/4], "\n") } - switch ptrbits(&bv, i) { - default: - throw("unexpected pointer bits") - case typeDead: - if debug.gcdead != 0 { - *(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(uintptr(poisonStack)) - } - case typeScalar: - // ok - case typePointer: + if ptrbit(&bv, i) == 1 { p := *(*unsafe.Pointer)(add(scanp, i*ptrSize)) up := uintptr(p) if f != nil && 0 < up && up < _PageSize && debug.invalidptr != 0 || up == poisonStack { @@ -461,7 +451,7 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool { throw("bad symbol table") } bv = stackmapdata(stackmap, pcdata) - size = (uintptr(bv.n) / typeBitsWidth) * ptrSize + size = uintptr(bv.n) * ptrSize if stackDebug >= 3 { print(" locals ", pcdata, "/", stackmap.n, " ", size/ptrSize, " words ", bv.bytedata, "\n") } -- 2.48.1