]> Cypherpunks repositories - gostls13.git/commitdiff
unicode/utf16: speed up and clean up Encode and EncodeRune
authorAlberto Donizetti <alb.donizetti@gmail.com>
Tue, 23 Feb 2016 21:54:38 +0000 (22:54 +0100)
committerBrad Fitzpatrick <bradfitz@golang.org>
Wed, 24 Feb 2016 21:07:03 +0000 (21:07 +0000)
name                        old time/op  new time/op  delta
EncodeValidASCII-4          74.1ns ± 1%  70.1ns ± 1%   -5.46%  (p=0.000 n=10+10)
EncodeValidJapaneseChars-4  61.3ns ± 0%  58.9ns ± 0%   -3.82%  (p=0.000 n=10+10)
EncodeRune-4                13.1ns ± 1%   9.8ns ± 0%  -25.24%   (p=0.000 n=10+9)

Fixes #6957

Change-Id: I9dde6d77420c34c6e2ef3e6213bb6be9b58a3074
Reviewed-on: https://go-review.googlesource.com/19891
Reviewed-by: Rob Pike <r@golang.org>
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/unicode/utf16/utf16.go

index 276fce9e563461940eb6ee8dd28f3e7c88af6eaf..0a5a02ebe2690da213e2ec10de90162564cd424b 100644 (file)
@@ -45,7 +45,7 @@ func DecodeRune(r1, r2 rune) rune {
 // If the rune is not a valid Unicode code point or does not need encoding,
 // EncodeRune returns U+FFFD, U+FFFD.
 func EncodeRune(r rune) (r1, r2 rune) {
-       if r < surrSelf || r > maxRune || IsSurrogate(r) {
+       if r < surrSelf || r > maxRune {
                return replacementChar, replacementChar
        }
        r -= surrSelf
@@ -65,20 +65,22 @@ func Encode(s []rune) []uint16 {
        n = 0
        for _, v := range s {
                switch {
-               case v < 0, surr1 <= v && v < surr3, v > maxRune:
-                       v = replacementChar
-                       fallthrough
-               case v < surrSelf:
+               case 0 <= v && v < surr1, surr3 <= v && v < surrSelf:
+                       // normal rune
                        a[n] = uint16(v)
                        n++
-               default:
+               case surrSelf <= v && v <= maxRune:
+                       // needs surrogate sequence
                        r1, r2 := EncodeRune(v)
                        a[n] = uint16(r1)
                        a[n+1] = uint16(r2)
                        n += 2
+               default:
+                       a[n] = uint16(replacementChar)
+                       n++
                }
        }
-       return a[0:n]
+       return a[:n]
 }
 
 // Decode returns the Unicode code point sequence represented