Make span6 function (used as LinkArch.Assemble) faster
by avoiding redundant re-assemble rounds on hosts
that are not NaCl.
NaCl is excluded because it needs Prog.Isize to fix alignment.
For make.bash, there are around 50% of functions that can
be encoded in a single trip. With this change, those function
will be assembled with 1 round instead of 2.
compilebench results:
name old time/op new time/op delta
Template 305ms ± 2% 299ms ± 2% -1.99% (p=0.001 n=10+10)
Unicode 139ms ± 3% 138ms ± 4% ~ (p=0.222 n=9+9)
GoTypes 1.05s ± 1% 1.04s ± 1% -1.34% (p=0.000 n=10+9)
Compiler 4.78s ± 1% 4.71s ± 1% -1.45% (p=0.000 n=9+9)
SSA 12.2s ± 1% 12.0s ± 1% -1.90% (p=0.000 n=9+10)
Flate 204ms ± 3% 202ms ± 3% ~ (p=0.052 n=10+10)
GoParser 248ms ± 1% 244ms ± 2% -1.79% (p=0.000 n=10+9)
Reflect 671ms ± 1% 664ms ± 1% -0.96% (p=0.001 n=9+9)
Tar 287ms ± 2% 285ms ± 3% ~ (p=0.393 n=10+10)
XML 362ms ± 1% 353ms ± 2% -2.60% (p=0.000 n=10+9)
StdCmd 29.2s ± 1% 29.0s ± 1% -0.63% (p=0.021 n=10+8)
[Geo mean] 888ms 875ms -1.40%
name old user-time/op new user-time/op delta
Template 393ms ± 5% 373ms ± 8% -5.12% (p=0.013 n=9+10)
Unicode 185ms ± 6% 184ms ± 5% ~ (p=0.825 n=10+10)
GoTypes 1.33s ± 1% 1.31s ± 3% -1.60% (p=0.004 n=10+10)
Compiler 5.98s ± 3% 5.92s ± 1% ~ (p=0.050 n=10+10)
SSA 15.5s ± 2% 15.3s ± 0% ~ (p=0.156 n=10+9)
Flate 255ms ± 5% 252ms ± 5% ~ (p=0.362 n=10+10)
GoParser 309ms ± 1% 304ms ± 3% -1.79% (p=0.021 n=7+10)
Reflect 839ms ± 2% 833ms ± 1% ~ (p=0.160 n=10+9)
Tar 363ms ± 3% 358ms ± 4% ~ (p=0.194 n=8+10)
XML 446ms ± 3% 442ms ± 3% ~ (p=0.503 n=10+10)
[Geo mean] 791ms 779ms -1.55%
Passes toolstash-check.
Change-Id: Ibcdb09f2c28907932581b7566f46d34be292594b
Reviewed-on: https://go-review.googlesource.com/108895
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
var c int32
errors := ctxt.Errors
for {
- loop := int32(0)
+ // This loop continues while there are reasons to re-assemble
+ // whole block, like the presence of long forward jumps.
+ reAssemble := false
for i := range s.R {
s.R[i] = obj.Reloc{}
}
v := int32(p.Pc - (q.Pc + int64(q.Isize)))
if q.Back&branchShort != 0 {
if v > 127 {
- loop++
+ reAssemble = true
q.Back ^= branchShort
}
m := ab.Len()
if int(p.Isize) != m {
p.Isize = uint8(m)
- loop++
+ // When building for NaCl, we currently need
+ // at least 2 rounds to ensure proper 32-byte alignment.
+ if ctxt.Headtype == objabi.Hnacl {
+ reAssemble = true
+ }
}
s.Grow(p.Pc + int64(m))
ctxt.Diag("span must be looping")
log.Fatalf("loop")
}
- if loop == 0 {
+ if !reAssemble {
break
}
if ctxt.Errors > errors {