]> Cypherpunks repositories - gostls13.git/commitdiff
unicode/utf8: make DecodeRune{,InString} inlineable
authorJulien Cretel <jub0bsinthecloud@gmail.com>
Tue, 2 Sep 2025 22:10:40 +0000 (22:10 +0000)
committert hepudds <thepudds1460@gmail.com>
Wed, 3 Sep 2025 14:04:47 +0000 (07:04 -0700)
This change makes the fast path for ASCII characters inlineable in
DecodeRune and DecodeRuneInString and removes most instances of manual
inlining at call sites.

Here are some benchmark results (no change to allocations):

goos: darwin
goarch: amd64
pkg: unicode/utf8
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                             │     old      │                 new                  │
                             │    sec/op    │    sec/op     vs base                │
DecodeASCIIRune-8              2.4545n ± 2%   0.6253n ± 2%  -74.52% (p=0.000 n=20)
DecodeJapaneseRune-8            3.988n ± 1%    4.023n ± 1%   +0.86% (p=0.050 n=20)
DecodeASCIIRuneInString-8      2.4675n ± 1%   0.6264n ± 2%  -74.61% (p=0.000 n=20)
DecodeJapaneseRuneInString-8    3.992n ± 1%    4.001n ± 1%        ~ (p=0.625 n=20)
geomean                         3.134n         1.585n       -49.43%

Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should
be reverted to their idiomatic implementations.

Fixes #31666
Updates #48195

Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d
GitHub-Last-Rev: 2e352a0045027e059be79cdb60241b5cf35fec71
GitHub-Pull-Request: golang/go#75181
Reviewed-on: https://go-review.googlesource.com/c/go/+/699675
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
14 files changed:
src/bufio/bufio.go
src/bytes/bytes.go
src/bytes/iter.go
src/cmd/compile/internal/test/inl_test.go
src/encoding/json/decode.go
src/fmt/format.go
src/fmt/print.go
src/regexp/regexp.go
src/strconv/quote.go
src/strings/iter.go
src/strings/reader.go
src/strings/strings.go
src/unicode/utf8/utf8.go
src/unicode/utf8/utf8_test.go

index 5244ce2e0ca9436b0fa8feead7c809a0f444da62..141a9a1a2a2305f070ec7ecf38ca16152ee2aeb9 100644 (file)
@@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) {
        if b.r == b.w {
                return 0, 0, b.readErr()
        }
-       r, size = rune(b.buf[b.r]), 1
-       if r >= utf8.RuneSelf {
-               r, size = utf8.DecodeRune(b.buf[b.r:b.w])
-       }
+       r, size = utf8.DecodeRune(b.buf[b.r:b.w])
        b.r += size
        b.lastByte = int(b.buf[b.r-1])
        b.lastRuneSize = size
index ce2e004910223440bfc7f60bfe2e8bca86000e80..9a7f4ee3c93afbd363804bd668779b766875d072 100644 (file)
@@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
        // more efficient, possibly due to cache effects.
        start := -1 // valid span start if >= 0
        for i := 0; i < len(s); {
-               size := 1
-               r := rune(s[i])
-               if r >= utf8.RuneSelf {
-                       r, size = utf8.DecodeRune(s[i:])
-               }
+               r, size := utf8.DecodeRune(s[i:])
                if f(r) {
                        if start >= 0 {
                                spans = append(spans, span{start, i})
@@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
        // fine. It could also shrink but that falls out naturally.
        b := make([]byte, 0, len(s))
        for i := 0; i < len(s); {
-               wid := 1
-               r := rune(s[i])
-               if r >= utf8.RuneSelf {
-                       r, wid = utf8.DecodeRune(s[i:])
-               }
+               r, wid := utf8.DecodeRune(s[i:])
                r = mapping(r)
                if r >= 0 {
                        b = utf8.AppendRune(b, r)
@@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
        start := 0
        for start < len(s) {
-               wid := 1
-               r := rune(s[start])
-               if r >= utf8.RuneSelf {
-                       r, wid = utf8.DecodeRune(s[start:])
-               }
+               r, wid := utf8.DecodeRune(s[start:])
                if f(r) == truth {
                        return start
                }
@@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {
 
 func trimLeftUnicode(s []byte, cutset string) []byte {
        for len(s) > 0 {
-               r, n := rune(s[0]), 1
-               if r >= utf8.RuneSelf {
-                       r, n = utf8.DecodeRune(s)
-               }
+               r, n := utf8.DecodeRune(s)
                if !containsRune(cutset, r) {
                        break
                }
@@ -1251,19 +1236,10 @@ hasUnicode:
        t = t[i:]
        for len(s) != 0 && len(t) != 0 {
                // Extract first rune from each.
-               var sr, tr rune
-               if s[0] < utf8.RuneSelf {
-                       sr, s = rune(s[0]), s[1:]
-               } else {
-                       r, size := utf8.DecodeRune(s)
-                       sr, s = r, s[size:]
-               }
-               if t[0] < utf8.RuneSelf {
-                       tr, t = rune(t[0]), t[1:]
-               } else {
-                       r, size := utf8.DecodeRune(t)
-                       tr, t = r, t[size:]
-               }
+               sr, size := utf8.DecodeRune(s)
+               s = s[size:]
+               tr, size := utf8.DecodeRune(t)
+               t = t[size:]
 
                // If they match, keep going; if not, return false.
 
index b2abb2c9ba3dc6477fb3c10c37ae9c54da44712c..a4ece881d20fa1ed10490d1e43dbb9179899c013 100644 (file)
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
        return func(yield func([]byte) bool) {
                start := -1
                for i := 0; i < len(s); {
-                       size := 1
-                       r := rune(s[i])
-                       if r >= utf8.RuneSelf {
-                               r, size = utf8.DecodeRune(s[i:])
-                       }
+                       r, size := utf8.DecodeRune(s[i:])
                        if f(r) {
                                if start >= 0 {
                                        if !yield(s[start:i:i]) {
index eda6084b48e7cc22bb08652b36e02110959089ad..a49cd767db43d85b7cd363d04e22d35337cbac40 100644 (file)
@@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) {
                        "assemble64",
                },
                "unicode/utf8": {
+                       "DecodeRune",
+                       "DecodeRuneInString",
                        "FullRune",
                        "FullRuneInString",
                        "RuneLen",
index 70885a517e1876428258e0347170f5b2357c783a..fc29296c0f464f4eb1927b3fa9a7d49b808eb445 100644 (file)
@@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) {
                if c == '\\' || c == '"' || c < ' ' {
                        break
                }
-               if c < utf8.RuneSelf {
-                       r++
-                       continue
-               }
                rr, size := utf8.DecodeRune(s[r:])
                if rr == utf8.RuneError && size == 1 {
                        break
index 90e18cd696375f3288833a32de5f2983de46093a..334a94e2983e634a0d010742e93dc39d30fa6b9d 100644 (file)
@@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte {
                        if n < 0 {
                                return b[:i]
                        }
-                       wid := 1
-                       if b[i] >= utf8.RuneSelf {
-                               _, wid = utf8.DecodeRune(b[i:])
-                       }
+                       _, wid := utf8.DecodeRune(b[i:])
                        i += wid
                }
        }
index 155218046f47ce9d5f5cd36da2aaa4753b4f0324..01cfa1a1c7d7b440eff7927d34241a7de0bf1b7b 100644 (file)
@@ -1145,10 +1145,7 @@ formatLoop:
                        break
                }
 
-               verb, size := rune(format[i]), 1
-               if verb >= utf8.RuneSelf {
-                       verb, size = utf8.DecodeRuneInString(format[i:])
-               }
+               verb, size := utf8.DecodeRuneInString(format[i:])
                i += size
 
                switch {
index 253415fb6a44c6edef7c2af5d43054441332231f..66c73693995a423d7dc8d105bc0a3b0c6de752ed 100644 (file)
@@ -384,10 +384,6 @@ type inputString struct {
 
 func (i *inputString) step(pos int) (rune, int) {
        if pos < len(i.str) {
-               c := i.str[pos]
-               if c < utf8.RuneSelf {
-                       return rune(c), 1
-               }
                return utf8.DecodeRuneInString(i.str[pos:])
        }
        return endOfText, 0
@@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag {
        r1, r2 := endOfText, endOfText
        // 0 < pos && pos <= len(i.str)
        if uint(pos-1) < uint(len(i.str)) {
-               r1 = rune(i.str[pos-1])
-               if r1 >= utf8.RuneSelf {
-                       r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
-               }
+               r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
        }
        // 0 <= pos && pos < len(i.str)
        if uint(pos) < uint(len(i.str)) {
-               r2 = rune(i.str[pos])
-               if r2 >= utf8.RuneSelf {
-                       r2, _ = utf8.DecodeRuneInString(i.str[pos:])
-               }
+               r2, _ = utf8.DecodeRuneInString(i.str[pos:])
        }
        return newLazyFlag(r1, r2)
 }
@@ -431,10 +421,6 @@ type inputBytes struct {
 
 func (i *inputBytes) step(pos int) (rune, int) {
        if pos < len(i.str) {
-               c := i.str[pos]
-               if c < utf8.RuneSelf {
-                       return rune(c), 1
-               }
                return utf8.DecodeRune(i.str[pos:])
        }
        return endOfText, 0
@@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag {
        r1, r2 := endOfText, endOfText
        // 0 < pos && pos <= len(i.str)
        if uint(pos-1) < uint(len(i.str)) {
-               r1 = rune(i.str[pos-1])
-               if r1 >= utf8.RuneSelf {
-                       r1, _ = utf8.DecodeLastRune(i.str[:pos])
-               }
+               r1, _ = utf8.DecodeLastRune(i.str[:pos])
        }
        // 0 <= pos && pos < len(i.str)
        if uint(pos) < uint(len(i.str)) {
-               r2 = rune(i.str[pos])
-               if r2 >= utf8.RuneSelf {
-                       r2, _ = utf8.DecodeRune(i.str[pos:])
-               }
+               r2, _ = utf8.DecodeRune(i.str[pos:])
        }
        return newLazyFlag(r1, r2)
 }
index 99c292a8ed58848441a400031260fa44960caa28..da2325647d381747716c80522a7e3059ea45da6f 100644 (file)
@@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b
                buf = nBuf
        }
        buf = append(buf, quote)
-       for width := 0; len(s) > 0; s = s[width:] {
-               r := rune(s[0])
-               width = 1
-               if r >= utf8.RuneSelf {
-                       r, width = utf8.DecodeRuneInString(s)
-               }
+       for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
+               r, width = utf8.DecodeRuneInString(s)
                if width == 1 && r == utf8.RuneError {
                        buf = append(buf, `\x`...)
                        buf = append(buf, lowerhex[s[0]>>4])
index 69fe031739628ce89c47e5ea011083fca1c350da..84e763a8343df41fa8d50977453099df507f56f5 100644 (file)
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] {
        return func(yield func(string) bool) {
                start := -1
                for i := 0; i < len(s); {
-                       size := 1
-                       r := rune(s[i])
-                       if r >= utf8.RuneSelf {
-                               r, size = utf8.DecodeRuneInString(s[i:])
-                       }
+                       r, size := utf8.DecodeRuneInString(s[i:])
                        if f(r) {
                                if start >= 0 {
                                        if !yield(s[start:i]) {
index 497ffb7a39c635f6bb97a4e22c4989d71a994de1..f12c9b18b36d43bbae252e260d9893ffd9fe01aa 100644 (file)
@@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) {
                return 0, 0, io.EOF
        }
        r.prevRune = int(r.i)
-       if c := r.s[r.i]; c < utf8.RuneSelf {
-               r.i++
-               return rune(c), 1, nil
-       }
        ch, size = utf8.DecodeRuneInString(r.s[r.i:])
        r.i += int64(size)
        return
index 74007977d911f0382e559f7b68bb119819364066..3cc3e79f982248f0d096986edfe1734bdb7989bc 100644 (file)
@@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string {
 // Unicode code points c satisfying f(c) removed.
 func TrimRightFunc(s string, f func(rune) bool) string {
        i := lastIndexFunc(s, f, false)
-       if i >= 0 && s[i] >= utf8.RuneSelf {
+       if i >= 0 {
                _, wid := utf8.DecodeRuneInString(s[i:])
                i += wid
        } else {
@@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string {
 
 func trimLeftUnicode(s, cutset string) string {
        for len(s) > 0 {
-               r, n := rune(s[0]), 1
-               if r >= utf8.RuneSelf {
-                       r, n = utf8.DecodeRuneInString(s)
-               }
+               r, n := utf8.DecodeRuneInString(s)
                if !ContainsRune(cutset, r) {
                        break
                }
@@ -1224,13 +1221,8 @@ hasUnicode:
                }
 
                // Extract first rune from second string.
-               var tr rune
-               if t[0] < utf8.RuneSelf {
-                       tr, t = rune(t[0]), t[1:]
-               } else {
-                       r, size := utf8.DecodeRuneInString(t)
-                       tr, t = r, t[size:]
-               }
+               tr, size := utf8.DecodeRuneInString(t)
+               t = t[size:]
 
                // If they match, keep going; if not, return false.
 
index 01cad1cc81f880cdc15d8e66147b5dafbedd201d..68283341d92ace8a383e4aaaffa70676f59ecd4a 100644 (file)
@@ -155,6 +155,20 @@ func FullRuneInString(s string) bool {
 // out of range, or is not the shortest possible UTF-8 encoding for the
 // value. No other validation is performed.
 func DecodeRune(p []byte) (r rune, size int) {
+       // Inlineable fast path for ASCII characters; see #48195.
+       // This implementation is weird but effective at rendering the
+       // function inlineable.
+       for _, b := range p {
+               if b < RuneSelf {
+                       return rune(b), 1
+               }
+               break
+       }
+       r, size = decodeRuneSlow(p)
+       return
+}
+
+func decodeRuneSlow(p []byte) (r rune, size int) {
        n := len(p)
        if n < 1 {
                return RuneError, 0
@@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) {
 // out of range, or is not the shortest possible UTF-8 encoding for the
 // value. No other validation is performed.
 func DecodeRuneInString(s string) (r rune, size int) {
+       // Inlineable fast path for ASCII characters; see #48195.
+       // This implementation is a bit weird but effective at rendering the
+       // function inlineable.
+       if s != "" && s[0] < RuneSelf {
+               return rune(s[0]), 1
+       } else {
+               r, size = decodeRuneInStringSlow(s)
+       }
+       return
+}
+
+func decodeRuneInStringSlow(s string) (rune, int) {
        n := len(s)
        if n < 1 {
                return RuneError, 0
index aece0fab731f414aa13251ab84b70b52d37886ff..bf4f074ffd0f5f685f7ea4923d89e23f66e68878 100644 (file)
@@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) {
 
 func BenchmarkDecodeASCIIRune(b *testing.B) {
        a := []byte{'a'}
-       for i := 0; i < b.N; i++ {
-               DecodeRune(a)
+       for range b.N {
+               runeSink, sizeSink = DecodeRune(a)
        }
 }
 
 func BenchmarkDecodeJapaneseRune(b *testing.B) {
        nihon := []byte("本")
-       for i := 0; i < b.N; i++ {
-               DecodeRune(nihon)
+       for range b.N {
+               runeSink, sizeSink = DecodeRune(nihon)
+       }
+}
+
+func BenchmarkDecodeASCIIRuneInString(b *testing.B) {
+       a := "a"
+       for range b.N {
+               runeSink, sizeSink = DecodeRuneInString(a)
        }
 }
 
+func BenchmarkDecodeJapaneseRuneInString(b *testing.B) {
+       nihon := "本"
+       for range b.N {
+               runeSink, sizeSink = DecodeRuneInString(nihon)
+       }
+}
+
+var (
+       runeSink rune
+       sizeSink int
+)
+
 // boolSink is used to reference the return value of benchmarked
 // functions to avoid dead code elimination.
 var boolSink bool