Turns out the optimal value is 8 on cortex-A9 systems (pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkAppend 907 908 +0.11%
BenchmarkAppend1Byte 101 101 +0.00%
BenchmarkAppend4Bytes 116 116 +0.00%
BenchmarkAppend8Bytes 139 138 -0.72%
BenchmarkAppend16Bytes 185 158 -14.59%
BenchmarkAppend32Bytes 131 131 +0.00%
BenchmarkAppendStr1Byte 72 72 +0.00%
BenchmarkAppendStr4Bytes 93 93 -0.21%
BenchmarkAppendStr8Bytes 116 116 +0.00%
BenchmarkAppendStr16Bytes 161 125 -22.36%
BenchmarkAppendStr32Bytes 102 102 +0.00%
BenchmarkAppendSpecialCase 613 613 +0.00%
R=golang-dev, r
CC=golang-dev
https://golang.org/cl/
8863045
thechar = '5',
BigEndian = 0,
CacheLineSize = 32,
- appendCrossover = 16
+ appendCrossover = 8
};