The slowest thing that can happen in funcdata is a cache miss
on moduledata.gofunc. Move that memory load earlier.
Also, for better ergonomics when working on this code,
do more calculations as uintptrs.
name old time/op new time/op delta
StackCopyWithStkobj-8 10.5ms ± 5% 9.9ms ± 4% -6.03% (p=0.000 n=15+15)
Change-Id: I590f4449725983c7f8d274c4ac7ed384d9018d85
Reviewed-on: https://go-review.googlesource.com/c/go/+/354134
Trust: Josh Bleecher Snyder <josharian@gmail.com>
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
if i < 0 || i >= f.nfuncdata {
return nil
}
- p := add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(f.npcdata)*4)
- p = add(p, uintptr(i)*4)
- off := *(*uint32)(p)
+ base := f.datap.gofunc // load gofunc address early so that we calculate during cache misses
+ p := uintptr(unsafe.Pointer(&f.nfuncdata)) + unsafe.Sizeof(f.nfuncdata) + uintptr(f.npcdata)*4 + uintptr(i)*4
+ off := *(*uint32)(unsafe.Pointer(p))
// Return off == ^uint32(0) ? 0 : f.datap.gofunc + uintptr(off), but without branches.
// The compiler calculates mask on most architectures using conditional assignment.
var mask uintptr
mask = 1
}
mask--
- raw := f.datap.gofunc + uintptr(off)
+ raw := base + uintptr(off)
return unsafe.Pointer(raw & mask)
}