]> Cypherpunks repositories - gostls13.git/commitdiff
exp/norm: performance improvements of quickSpan
authorMarcel van Lohuizen <mpvl@golang.org>
Mon, 5 Sep 2011 17:09:20 +0000 (19:09 +0200)
committerMarcel van Lohuizen <mpvl@golang.org>
Mon, 5 Sep 2011 17:09:20 +0000 (19:09 +0200)
- fixed performance bug that could lead to O(n^2) behavior
- performance improvement for ASCII case

R=r, r
CC=golang-dev
https://golang.org/cl/4956060

src/pkg/exp/norm/normalize.go
src/pkg/exp/norm/normalize_test.go

index 749d3aa30d1ae58f508f380acf86f239c36f1afc..0bbf2547b9812eac346b204c00594a66336e3225 100644 (file)
@@ -198,12 +198,16 @@ func (f Form) QuickSpan(b []byte) int {
 func quickSpan(fd *formInfo, b []byte) int {
        var lastCC uint8
        var lastSegStart int
-       i := 0
+       var i, nc int
        for i < len(b) {
                if b[i] < utf8.RuneSelf {
-                       lastSegStart = i
-                       i++
+                       // Keep the loop tight for ASCII processing, as this is where
+                       // most of the time is spent for this case.
+                       for i++; i < len(b) && b[i] < utf8.RuneSelf; i++ {
+                       }
+                       lastSegStart = i - 1
                        lastCC = 0
+                       nc = 0
                        continue
                }
                info := fd.info(b[i:])
@@ -212,9 +216,6 @@ func quickSpan(fd *formInfo, b []byte) int {
                        return len(b)
                }
                cc := info.ccc
-               if lastCC > cc && cc != 0 {
-                       return lastSegStart
-               }
                if fd.composing {
                        if !info.flags.isYesC() {
                                break
@@ -224,8 +225,20 @@ func quickSpan(fd *formInfo, b []byte) int {
                                break
                        }
                }
-               if !fd.composing && cc == 0 {
+               if cc == 0 {
                        lastSegStart = i
+                       nc = 0
+               } else {
+                       if nc >= maxCombiningChars {
+                               lastSegStart = i
+                               lastCC = cc
+                               nc = 1
+                       } else {
+                               if lastCC > cc {
+                                       return lastSegStart
+                               }
+                               nc++
+                       }
                }
                lastCC = cc
                i += int(info.size)
index 9159a90c4dadcb72ffa511cd7310bd94be8b1bec..6e8650d59d3b8695580eb0fc9a88b07bc7832873 100644 (file)
@@ -220,6 +220,11 @@ var quickSpanTests = []PositionTest{
        // incorrectly ordered combining characters
        {"\u0300\u0316", 0, ""},
        {"\u0300\u0316cd", 0, ""},
+       // have a maximum number of combining characters.
+       {strings.Repeat("\u035D", 30) + "\u035B", 62, ""},
+       {"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""},
+       {"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
+       {"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
 }
 
 var quickSpanNFDTests = []PositionTest{