From 8e2bb7bb4ab8c15204a995e976ce7d023d0e37bf Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 28 Jan 2015 15:25:05 -0500 Subject: [PATCH] runtime: use threads slice in parfor instead of unsafe pointer math parfor originally used a tail array for its thread array. This got replaced with a slice allocation in the conversion to Go, but many of its gnarlier effects remained. Instead of keeping track of the pointer to the first element of the slice and using unsafe pointer math to get at the ith element, just keep the slice around and use regular slice indexing. There is no longer any need for padding to 64-bit align the tail array (there hasn't been since the Go conversion), so remove this unnecessary padding from the parfor struct. Finally, since the slice tracks its own length, replace the nthrmax field with len(thr). Change-Id: I0020a1815849bca53e3613a8fa46ae4fbae67576 Reviewed-on: https://go-review.googlesource.com/3394 Reviewed-by: Russ Cox --- src/runtime/export_test.go | 17 ++++++++-------- src/runtime/parfor.go | 41 +++++++++++++++----------------------- 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 016938ed4e..3b13b7bb38 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -36,14 +36,13 @@ func LFStackPop(head *uint64) *LFNode { } type ParFor struct { - body *byte - done uint32 - Nthr uint32 - nthrmax uint32 - thrseq uint32 - Cnt uint32 - Ctx *byte - wait bool + body *byte + done uint32 + Nthr uint32 + thrseq uint32 + Cnt uint32 + Ctx *byte + wait bool } func NewParFor(nthrmax uint32) *ParFor { @@ -69,7 +68,7 @@ func ParForDo(desc *ParFor) { func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) { desc1 := (*parfor)(unsafe.Pointer(desc)) - pos := desc_thr_index(desc1, tid).pos + pos := desc1.thr[tid].pos return uint32(pos), uint32(pos >> 32) } diff --git a/src/runtime/parfor.go b/src/runtime/parfor.go index 1c28924096..fc5ebd887e 100644 --- a/src/runtime/parfor.go +++ b/src/runtime/parfor.go @@ -10,17 +10,17 @@ import "unsafe" // A parfor holds state for the parallel for operation. type parfor struct { - body unsafe.Pointer // go func(*parfor, uint32), executed for each element - done uint32 // number of idle threads - nthr uint32 // total number of threads - nthrmax uint32 // maximum number of threads - thrseq uint32 // thread id sequencer - cnt uint32 // iteration space [0, cnt) - ctx unsafe.Pointer // arbitrary user context - wait bool // if true, wait while all threads finish processing, + body unsafe.Pointer // go func(*parfor, uint32), executed for each element + done uint32 // number of idle threads + nthr uint32 // total number of threads + thrseq uint32 // thread id sequencer + cnt uint32 // iteration space [0, cnt) + ctx unsafe.Pointer // arbitrary user context + wait bool // if true, wait while all threads finish processing, // otherwise parfor may return while other threads are still working - thr *parforthread // array of thread descriptors - pad uint32 // to align parforthread.pos for 64-bit atomic operations + + thr []parforthread // thread descriptors + // stats nsteal uint64 nstealcnt uint64 @@ -42,14 +42,9 @@ type parforthread struct { pad [_CacheLineSize]byte } -func desc_thr_index(desc *parfor, i uint32) *parforthread { - return (*parforthread)(add(unsafe.Pointer(desc.thr), uintptr(i)*unsafe.Sizeof(*desc.thr))) -} - func parforalloc(nthrmax uint32) *parfor { return &parfor{ - thr: &make([]parforthread, nthrmax)[0], - nthrmax: nthrmax, + thr: make([]parforthread, nthrmax), } } @@ -66,7 +61,7 @@ func parforalloc(nthrmax uint32) *parfor { // The opaque user context ctx is recorded as desc.ctx and can be used by body. // TODO(austin): Remove ctx in favor of using a closure for body. func parforsetup(desc *parfor, nthr, n uint32, ctx unsafe.Pointer, wait bool, body func(*parfor, uint32)) { - if desc == nil || nthr == 0 || nthr > desc.nthrmax || body == nil { + if desc == nil || nthr == 0 || nthr > uint32(len(desc.thr)) || body == nil { print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n") throw("parfor: invalid args") } @@ -84,14 +79,10 @@ func parforsetup(desc *parfor, nthr, n uint32, ctx unsafe.Pointer, wait bool, bo desc.nosyield = 0 desc.nsleep = 0 - for i := uint32(0); i < nthr; i++ { + for i := range desc.thr { begin := uint32(uint64(n) * uint64(i) / uint64(nthr)) end := uint32(uint64(n) * uint64(i+1) / uint64(nthr)) - pos := &desc_thr_index(desc, i).pos - if uintptr(unsafe.Pointer(pos))&7 != 0 { - throw("parforsetup: pos is not aligned") - } - *pos = uint64(begin) | uint64(end)<<32 + desc.thr[i].pos = uint64(begin) | uint64(end)<<32 } } @@ -112,7 +103,7 @@ func parfordo(desc *parfor) { return } - me := desc_thr_index(desc, tid) + me := &desc.thr[tid] mypos := &me.pos for { for { @@ -157,7 +148,7 @@ func parfordo(desc *parfor) { if victim >= tid { victim++ } - victimpos := &desc_thr_index(desc, victim).pos + victimpos := &desc.thr[victim].pos for { // See if it has any work. pos := atomicload64(victimpos) -- 2.48.1