This change makes the fast path for ASCII characters inlineable in
DecodeRune and DecodeRuneInString and removes most instances of manual
inlining at call sites.
Here are some benchmark results (no change to allocations):
goos: darwin
goarch: amd64
pkg: unicode/utf8
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
│ old │ new │
│ sec/op │ sec/op vs base │
DecodeASCIIRune-8 2.4545n ± 2% 0.6253n ± 2% -74.52% (p=0.000 n=20)
DecodeJapaneseRune-8 3.988n ± 1% 4.023n ± 1% +0.86% (p=0.050 n=20)
DecodeASCIIRuneInString-8 2.4675n ± 1% 0.6264n ± 2% -74.61% (p=0.000 n=20)
DecodeJapaneseRuneInString-8 3.992n ± 1% 4.001n ± 1% ~ (p=0.625 n=20)
geomean 3.134n 1.585n -49.43%
Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should
be reverted to their idiomatic implementations.
Fixes #31666
Updates #48195
Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d
GitHub-Last-Rev:
2e352a0045027e059be79cdb60241b5cf35fec71
GitHub-Pull-Request: golang/go#75181
Reviewed-on: https://go-review.googlesource.com/c/go/+/699675
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
if b.r == b.w {
return 0, 0, b.readErr()
}
- r, size = rune(b.buf[b.r]), 1
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(b.buf[b.r:b.w])
- }
+ r, size = utf8.DecodeRune(b.buf[b.r:b.w])
b.r += size
b.lastByte = int(b.buf[b.r-1])
b.lastRuneSize = size
// more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(s[i:])
- }
+ r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
spans = append(spans, span{start, i})
// fine. It could also shrink but that falls out naturally.
b := make([]byte, 0, len(s))
for i := 0; i < len(s); {
- wid := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRune(s[i:])
- }
+ r, wid := utf8.DecodeRune(s[i:])
r = mapping(r)
if r >= 0 {
b = utf8.AppendRune(b, r)
func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
start := 0
for start < len(s) {
- wid := 1
- r := rune(s[start])
- if r >= utf8.RuneSelf {
- r, wid = utf8.DecodeRune(s[start:])
- }
+ r, wid := utf8.DecodeRune(s[start:])
if f(r) == truth {
return start
}
func trimLeftUnicode(s []byte, cutset string) []byte {
for len(s) > 0 {
- r, n := rune(s[0]), 1
- if r >= utf8.RuneSelf {
- r, n = utf8.DecodeRune(s)
- }
+ r, n := utf8.DecodeRune(s)
if !containsRune(cutset, r) {
break
}
t = t[i:]
for len(s) != 0 && len(t) != 0 {
// Extract first rune from each.
- var sr, tr rune
- if s[0] < utf8.RuneSelf {
- sr, s = rune(s[0]), s[1:]
- } else {
- r, size := utf8.DecodeRune(s)
- sr, s = r, s[size:]
- }
- if t[0] < utf8.RuneSelf {
- tr, t = rune(t[0]), t[1:]
- } else {
- r, size := utf8.DecodeRune(t)
- tr, t = r, t[size:]
- }
+ sr, size := utf8.DecodeRune(s)
+ s = s[size:]
+ tr, size := utf8.DecodeRune(t)
+ t = t[size:]
// If they match, keep going; if not, return false.
return func(yield func([]byte) bool) {
start := -1
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRune(s[i:])
- }
+ r, size := utf8.DecodeRune(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i:i]) {
"assemble64",
},
"unicode/utf8": {
+ "DecodeRune",
+ "DecodeRuneInString",
"FullRune",
"FullRuneInString",
"RuneLen",
if c == '\\' || c == '"' || c < ' ' {
break
}
- if c < utf8.RuneSelf {
- r++
- continue
- }
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
if n < 0 {
return b[:i]
}
- wid := 1
- if b[i] >= utf8.RuneSelf {
- _, wid = utf8.DecodeRune(b[i:])
- }
+ _, wid := utf8.DecodeRune(b[i:])
i += wid
}
}
break
}
- verb, size := rune(format[i]), 1
- if verb >= utf8.RuneSelf {
- verb, size = utf8.DecodeRuneInString(format[i:])
- }
+ verb, size := utf8.DecodeRuneInString(format[i:])
i += size
switch {
func (i *inputString) step(pos int) (rune, int) {
if pos < len(i.str) {
- c := i.str[pos]
- if c < utf8.RuneSelf {
- return rune(c), 1
- }
return utf8.DecodeRuneInString(i.str[pos:])
}
return endOfText, 0
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
- r1 = rune(i.str[pos-1])
- if r1 >= utf8.RuneSelf {
- r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
- }
+ r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
- r2 = rune(i.str[pos])
- if r2 >= utf8.RuneSelf {
- r2, _ = utf8.DecodeRuneInString(i.str[pos:])
- }
+ r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
return newLazyFlag(r1, r2)
}
func (i *inputBytes) step(pos int) (rune, int) {
if pos < len(i.str) {
- c := i.str[pos]
- if c < utf8.RuneSelf {
- return rune(c), 1
- }
return utf8.DecodeRune(i.str[pos:])
}
return endOfText, 0
r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) {
- r1 = rune(i.str[pos-1])
- if r1 >= utf8.RuneSelf {
- r1, _ = utf8.DecodeLastRune(i.str[:pos])
- }
+ r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
// 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) {
- r2 = rune(i.str[pos])
- if r2 >= utf8.RuneSelf {
- r2, _ = utf8.DecodeRune(i.str[pos:])
- }
+ r2, _ = utf8.DecodeRune(i.str[pos:])
}
return newLazyFlag(r1, r2)
}
buf = nBuf
}
buf = append(buf, quote)
- for width := 0; len(s) > 0; s = s[width:] {
- r := rune(s[0])
- width = 1
- if r >= utf8.RuneSelf {
- r, width = utf8.DecodeRuneInString(s)
- }
+ for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
+ r, width = utf8.DecodeRuneInString(s)
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
return func(yield func(string) bool) {
start := -1
for i := 0; i < len(s); {
- size := 1
- r := rune(s[i])
- if r >= utf8.RuneSelf {
- r, size = utf8.DecodeRuneInString(s[i:])
- }
+ r, size := utf8.DecodeRuneInString(s[i:])
if f(r) {
if start >= 0 {
if !yield(s[start:i]) {
return 0, 0, io.EOF
}
r.prevRune = int(r.i)
- if c := r.s[r.i]; c < utf8.RuneSelf {
- r.i++
- return rune(c), 1, nil
- }
ch, size = utf8.DecodeRuneInString(r.s[r.i:])
r.i += int64(size)
return
// Unicode code points c satisfying f(c) removed.
func TrimRightFunc(s string, f func(rune) bool) string {
i := lastIndexFunc(s, f, false)
- if i >= 0 && s[i] >= utf8.RuneSelf {
+ if i >= 0 {
_, wid := utf8.DecodeRuneInString(s[i:])
i += wid
} else {
func trimLeftUnicode(s, cutset string) string {
for len(s) > 0 {
- r, n := rune(s[0]), 1
- if r >= utf8.RuneSelf {
- r, n = utf8.DecodeRuneInString(s)
- }
+ r, n := utf8.DecodeRuneInString(s)
if !ContainsRune(cutset, r) {
break
}
}
// Extract first rune from second string.
- var tr rune
- if t[0] < utf8.RuneSelf {
- tr, t = rune(t[0]), t[1:]
- } else {
- r, size := utf8.DecodeRuneInString(t)
- tr, t = r, t[size:]
- }
+ tr, size := utf8.DecodeRuneInString(t)
+ t = t[size:]
// If they match, keep going; if not, return false.
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRune(p []byte) (r rune, size int) {
+ // Inlineable fast path for ASCII characters; see #48195.
+ // This implementation is weird but effective at rendering the
+ // function inlineable.
+ for _, b := range p {
+ if b < RuneSelf {
+ return rune(b), 1
+ }
+ break
+ }
+ r, size = decodeRuneSlow(p)
+ return
+}
+
+func decodeRuneSlow(p []byte) (r rune, size int) {
n := len(p)
if n < 1 {
return RuneError, 0
// out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed.
func DecodeRuneInString(s string) (r rune, size int) {
+ // Inlineable fast path for ASCII characters; see #48195.
+ // This implementation is a bit weird but effective at rendering the
+ // function inlineable.
+ if s != "" && s[0] < RuneSelf {
+ return rune(s[0]), 1
+ } else {
+ r, size = decodeRuneInStringSlow(s)
+ }
+ return
+}
+
+func decodeRuneInStringSlow(s string) (rune, int) {
n := len(s)
if n < 1 {
return RuneError, 0
func BenchmarkDecodeASCIIRune(b *testing.B) {
a := []byte{'a'}
- for i := 0; i < b.N; i++ {
- DecodeRune(a)
+ for range b.N {
+ runeSink, sizeSink = DecodeRune(a)
}
}
func BenchmarkDecodeJapaneseRune(b *testing.B) {
nihon := []byte("本")
- for i := 0; i < b.N; i++ {
- DecodeRune(nihon)
+ for range b.N {
+ runeSink, sizeSink = DecodeRune(nihon)
+ }
+}
+
+func BenchmarkDecodeASCIIRuneInString(b *testing.B) {
+ a := "a"
+ for range b.N {
+ runeSink, sizeSink = DecodeRuneInString(a)
}
}
+func BenchmarkDecodeJapaneseRuneInString(b *testing.B) {
+ nihon := "本"
+ for range b.N {
+ runeSink, sizeSink = DecodeRuneInString(nihon)
+ }
+}
+
+var (
+ runeSink rune
+ sizeSink int
+)
+
// boolSink is used to reference the return value of benchmarked
// functions to avoid dead code elimination.
var boolSink bool