]> Cypherpunks repositories - gostls13.git/commitdiff
unicode: improve SimpleFold performance for ascii
authorEgon Elbre <egonelbre@gmail.com>
Tue, 17 Nov 2015 14:51:23 +0000 (16:51 +0200)
committerBrad Fitzpatrick <bradfitz@golang.org>
Tue, 26 Apr 2016 21:59:50 +0000 (21:59 +0000)
This change significantly speeds up case-insensitive regexp matching.

benchmark                      old ns/op      new ns/op      delta
BenchmarkMatchEasy0i_32-8      2690           1473           -45.24%
BenchmarkMatchEasy0i_1K-8      80404          42269          -47.43%
BenchmarkMatchEasy0i_32K-8     3272187        2076118        -36.55%
BenchmarkMatchEasy0i_1M-8      104805990      66503805       -36.55%
BenchmarkMatchEasy0i_32M-8     3360192200     2126121600     -36.73%

benchmark                      old MB/s     new MB/s     speedup
BenchmarkMatchEasy0i_32-8      11.90        21.72        1.83x
BenchmarkMatchEasy0i_1K-8      12.74        24.23        1.90x
BenchmarkMatchEasy0i_32K-8     10.01        15.78        1.58x
BenchmarkMatchEasy0i_1M-8      10.00        15.77        1.58x
BenchmarkMatchEasy0i_32M-8     9.99         15.78        1.58x

Issue #13288

Change-Id: I94af7bb29e75d60b4f6ee760124867ab271b9642
Reviewed-on: https://go-review.googlesource.com/16943
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/regexp/exec_test.go
src/unicode/letter.go
src/unicode/maketables.go
src/unicode/tables.go

index cfc1e147c1c48a122f084509bc7f31e8331d19cd..f8f5f4020e05aa8fff714b0f1e476880479cd7d7 100644 (file)
@@ -672,6 +672,7 @@ func benchmark(b *testing.B, re string, n int) {
 
 const (
        easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+       easy0i = "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"
        easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
        medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
        hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
@@ -682,6 +683,11 @@ func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
 func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
 func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
 func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
+func BenchmarkMatchEasy0i_32(b *testing.B)  { benchmark(b, easy0i, 32<<0) }
+func BenchmarkMatchEasy0i_1K(b *testing.B)  { benchmark(b, easy0i, 1<<10) }
+func BenchmarkMatchEasy0i_32K(b *testing.B) { benchmark(b, easy0i, 32<<10) }
+func BenchmarkMatchEasy0i_1M(b *testing.B)  { benchmark(b, easy0i, 1<<20) }
+func BenchmarkMatchEasy0i_32M(b *testing.B) { benchmark(b, easy0i, 32<<20) }
 func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
 func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
 func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
index ffa083eb57888e475847ba8777fb0f316d1add61..8aec920d22f83af835c724f1d322fd394bf7f9ed 100644 (file)
@@ -332,6 +332,10 @@ type foldPair struct {
 //     SimpleFold('1') = '1'
 //
 func SimpleFold(r rune) rune {
+       if int(r) < len(asciiFold) {
+               return rune(asciiFold[r])
+       }
+
        // Consult caseOrbit table for special cases.
        lo := 0
        hi := len(caseOrbit)
index 328c75ed6396645a966e243abae146392106d32d..f364515c9045f38ea1500e2b677f117492a0ee32 100644 (file)
@@ -1172,6 +1172,7 @@ func printCasefold() {
                }
        }
 
+       printAsciiFold()
        printCaseOrbit()
 
        // Tables of category and script folding exceptions: code points
@@ -1269,6 +1270,25 @@ var comment = map[string]string{
                "// If there is no entry for a script name, there are no such points.\n",
 }
 
+func printAsciiFold() {
+       printf("var asciiFold = [MaxASCII + 1]uint16{\n")
+       for i := rune(0); i <= unicode.MaxASCII; i++ {
+               c := chars[i]
+               f := c.caseOrbit
+               if f == 0 {
+                       if c.lowerCase != i && c.lowerCase != 0 {
+                               f = c.lowerCase
+                       } else if c.upperCase != i && c.upperCase != 0 {
+                               f = c.upperCase
+                       } else {
+                               f = i
+                       }
+               }
+               printf("\t0x%04X,\n", f)
+       }
+       printf("}\n\n")
+}
+
 func printCaseOrbit() {
        if *test {
                for j := range chars {
index 8bb42062f9ed8f6007c43286f20de16563b988da..c04d69a6ff14bd15d39ad41cc2d1b585fb0a2bd1 100644 (file)
@@ -6834,6 +6834,137 @@ var properties = [MaxLatin1 + 1]uint8{
        0xFF: pLl | pp, // 'ΓΏ'
 }
 
+var asciiFold = [MaxASCII + 1]uint16{
+       0x0000,
+       0x0001,
+       0x0002,
+       0x0003,
+       0x0004,
+       0x0005,
+       0x0006,
+       0x0007,
+       0x0008,
+       0x0009,
+       0x000A,
+       0x000B,
+       0x000C,
+       0x000D,
+       0x000E,
+       0x000F,
+       0x0010,
+       0x0011,
+       0x0012,
+       0x0013,
+       0x0014,
+       0x0015,
+       0x0016,
+       0x0017,
+       0x0018,
+       0x0019,
+       0x001A,
+       0x001B,
+       0x001C,
+       0x001D,
+       0x001E,
+       0x001F,
+       0x0020,
+       0x0021,
+       0x0022,
+       0x0023,
+       0x0024,
+       0x0025,
+       0x0026,
+       0x0027,
+       0x0028,
+       0x0029,
+       0x002A,
+       0x002B,
+       0x002C,
+       0x002D,
+       0x002E,
+       0x002F,
+       0x0030,
+       0x0031,
+       0x0032,
+       0x0033,
+       0x0034,
+       0x0035,
+       0x0036,
+       0x0037,
+       0x0038,
+       0x0039,
+       0x003A,
+       0x003B,
+       0x003C,
+       0x003D,
+       0x003E,
+       0x003F,
+       0x0040,
+       0x0061,
+       0x0062,
+       0x0063,
+       0x0064,
+       0x0065,
+       0x0066,
+       0x0067,
+       0x0068,
+       0x0069,
+       0x006A,
+       0x006B,
+       0x006C,
+       0x006D,
+       0x006E,
+       0x006F,
+       0x0070,
+       0x0071,
+       0x0072,
+       0x0073,
+       0x0074,
+       0x0075,
+       0x0076,
+       0x0077,
+       0x0078,
+       0x0079,
+       0x007A,
+       0x005B,
+       0x005C,
+       0x005D,
+       0x005E,
+       0x005F,
+       0x0060,
+       0x0041,
+       0x0042,
+       0x0043,
+       0x0044,
+       0x0045,
+       0x0046,
+       0x0047,
+       0x0048,
+       0x0049,
+       0x004A,
+       0x212A,
+       0x004C,
+       0x004D,
+       0x004E,
+       0x004F,
+       0x0050,
+       0x0051,
+       0x0052,
+       0x017F,
+       0x0054,
+       0x0055,
+       0x0056,
+       0x0057,
+       0x0058,
+       0x0059,
+       0x005A,
+       0x007B,
+       0x007C,
+       0x007D,
+       0x007E,
+       0x007F,
+}
+
 var caseOrbit = []foldPair{
        {0x004B, 0x006B},
        {0x0053, 0x0073},