]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/json: speed up decoding
authorBrad Fitzpatrick <bradfitz@golang.org>
Wed, 18 Dec 2013 15:30:21 +0000 (07:30 -0800)
committerBrad Fitzpatrick <bradfitz@golang.org>
Wed, 18 Dec 2013 15:30:21 +0000 (07:30 -0800)
Don't make copies of keys while decoding, and don't use the
expensive strings.EqualFold when it's not necessary. Instead,
note in the existing field cache what algorithm to use to
check fold equality... most keys are just ASCII letters.

benchmark               old ns/op    new ns/op    delta
BenchmarkCodeDecoder    137074314    103974418  -24.15%

benchmark                old MB/s     new MB/s  speedup
BenchmarkCodeDecoder        14.16        18.66    1.32x

Update #6496

R=golang-dev, rsc, adg, r, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/13894045

src/pkg/encoding/json/decode.go
src/pkg/encoding/json/encode.go
src/pkg/encoding/json/fold.go [new file with mode: 0644]
src/pkg/encoding/json/fold_test.go [new file with mode: 0644]

index 458fb39ec0123e02bc589cebadd089d5b6a67341..4db566726e041ab84be1b08196822ac5f58f6e08 100644 (file)
@@ -8,6 +8,7 @@
 package json
 
 import (
+       "bytes"
        "encoding"
        "encoding/base64"
        "errors"
@@ -15,7 +16,6 @@ import (
        "reflect"
        "runtime"
        "strconv"
-       "strings"
        "unicode"
        "unicode/utf16"
        "unicode/utf8"
@@ -500,11 +500,11 @@ func (d *decodeState) object(v reflect.Value) {
                        d.error(errPhase)
                }
 
-               // Read string key.
+               // Read key.
                start := d.off - 1
                op = d.scanWhile(scanContinue)
                item := d.data[start : d.off-1]
-               key, ok := unquote(item)
+               key, ok := unquoteBytes(item)
                if !ok {
                        d.error(errPhase)
                }
@@ -526,11 +526,11 @@ func (d *decodeState) object(v reflect.Value) {
                        fields := cachedTypeFields(v.Type())
                        for i := range fields {
                                ff := &fields[i]
-                               if ff.name == key {
+                               if bytes.Equal(ff.nameBytes, key) {
                                        f = ff
                                        break
                                }
-                               if f == nil && strings.EqualFold(ff.name, key) {
+                               if f == nil && ff.equalFold(ff.nameBytes, key) {
                                        f = ff
                                }
                        }
index 7d6c71d7a9016a276c3199b4054111005e1d20f4..8c71770ca4624dd60c96fb44121b8647e287d4fc 100644 (file)
@@ -936,6 +936,9 @@ func (e *encodeState) stringBytes(s []byte) (int, error) {
 // A field represents a single field found in a struct.
 type field struct {
        name      string
+       nameBytes []byte                 // []byte(name)
+       equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
+
        tag       bool
        index     []int
        typ       reflect.Type
@@ -943,6 +946,12 @@ type field struct {
        quoted    bool
 }
 
+func fillField(f field) field {
+       f.nameBytes = []byte(f.name)
+       f.equalFold = foldFunc(f.nameBytes)
+       return f
+}
+
 // byName sorts field by name, breaking ties with depth,
 // then breaking ties with "name came from json tag", then
 // breaking ties with index sequence.
@@ -1042,8 +1051,14 @@ func typeFields(t reflect.Type) []field {
                                        if name == "" {
                                                name = sf.Name
                                        }
-                                       fields = append(fields, field{name, tagged, index, ft,
-                                               opts.Contains("omitempty"), opts.Contains("string")})
+                                       fields = append(fields, fillField(field{
+                                               name:      name,
+                                               tag:       tagged,
+                                               index:     index,
+                                               typ:       ft,
+                                               omitEmpty: opts.Contains("omitempty"),
+                                               quoted:    opts.Contains("string"),
+                                       }))
                                        if count[f.typ] > 1 {
                                                // If there were multiple instances, add a second,
                                                // so that the annihilation code will see a duplicate.
@@ -1057,7 +1072,7 @@ func typeFields(t reflect.Type) []field {
                                // Record new anonymous struct to explore in next round.
                                nextCount[ft]++
                                if nextCount[ft] == 1 {
-                                       next = append(next, field{name: ft.Name(), index: index, typ: ft})
+                                       next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
                                }
                        }
                }
diff --git a/src/pkg/encoding/json/fold.go b/src/pkg/encoding/json/fold.go
new file mode 100644 (file)
index 0000000..d6f77c9
--- /dev/null
@@ -0,0 +1,143 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+       "bytes"
+       "unicode/utf8"
+)
+
+const (
+       caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
+       kelvin       = '\u212a'
+       smallLongEss = '\u017f'
+)
+
+// foldFunc returns one of four different case folding equivalence
+// functions, from most general (and slow) to fastest:
+//
+// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
+// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
+// 3) asciiEqualFold, no special, but includes non-letters (including _)
+// 4) simpleLetterEqualFold, no specials, no non-letters.
+//
+// The letters S and K are special because they map to 3 runes, not just 2:
+//  * S maps to s and to U+017F 'ſ' Latin small letter long s
+//  * k maps to K and to U+212A 'K' Kelvin sign
+// See http://play.golang.org/p/tTxjOc0OGo
+//
+// The returned function is specialized for matching against s and
+// should only be given s. It's not curried for performance reasons.
+func foldFunc(s []byte) func(s, t []byte) bool {
+       nonLetter := false
+       special := false // special letter
+       for _, b := range s {
+               if b >= utf8.RuneSelf {
+                       return bytes.EqualFold
+               }
+               upper := b & caseMask
+               if upper < 'A' || upper > 'Z' {
+                       nonLetter = true
+               } else if upper == 'K' || upper == 'S' {
+                       // See above for why these letters are special.
+                       special = true
+               }
+       }
+       if special {
+               return equalFoldRight
+       }
+       if nonLetter {
+               return asciiEqualFold
+       }
+       return simpleLetterEqualFold
+}
+
+// equalFoldRight is a specialization of bytes.EqualFold when s is
+// known to be all ASCII (including punctuation), but contains an 's',
+// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
+// See comments on foldFunc.
+func equalFoldRight(s, t []byte) bool {
+       for _, sb := range s {
+               if len(t) == 0 {
+                       return false
+               }
+               tb := t[0]
+               if tb < utf8.RuneSelf {
+                       if sb != tb {
+                               sbUpper := sb & caseMask
+                               if 'A' <= sbUpper && sbUpper <= 'Z' {
+                                       if sbUpper != tb&caseMask {
+                                               return false
+                                       }
+                               } else {
+                                       return false
+                               }
+                       }
+                       t = t[1:]
+                       continue
+               }
+               // sb is ASCII and t is not. t must be either kelvin
+               // sign or long s; sb must be s, S, k, or K.
+               tr, size := utf8.DecodeRune(t)
+               switch sb {
+               case 's', 'S':
+                       if tr != smallLongEss {
+                               return false
+                       }
+               case 'k', 'K':
+                       if tr != kelvin {
+                               return false
+                       }
+               default:
+                       return false
+               }
+               t = t[size:]
+
+       }
+       if len(t) > 0 {
+               return false
+       }
+       return true
+}
+
+// asciiEqualFold is a specialization of bytes.EqualFold for use when
+// s is all ASCII (but may contain non-letters) and contains no
+// special-folding letters.
+// See comments on foldFunc.
+func asciiEqualFold(s, t []byte) bool {
+       if len(s) != len(t) {
+               return false
+       }
+       for i, sb := range s {
+               tb := t[i]
+               if sb == tb {
+                       continue
+               }
+               if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
+                       if sb&caseMask != tb&caseMask {
+                               return false
+                       }
+               } else {
+                       return false
+               }
+       }
+       return true
+}
+
+// simpleLetterEqualFold is a specialization of bytes.EqualFold for
+// use when s is all ASCII letters (no underscores, etc) and also
+// doesn't contain 'k', 'K', 's', or 'S'.
+// See comments on foldFunc.
+func simpleLetterEqualFold(s, t []byte) bool {
+       if len(s) != len(t) {
+               return false
+       }
+       for i, b := range s {
+               if b&caseMask != t[i]&caseMask {
+                       return false
+               }
+       }
+       return true
+}
diff --git a/src/pkg/encoding/json/fold_test.go b/src/pkg/encoding/json/fold_test.go
new file mode 100644 (file)
index 0000000..9fb9464
--- /dev/null
@@ -0,0 +1,116 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+       "bytes"
+       "strings"
+       "testing"
+       "unicode/utf8"
+)
+
+var foldTests = []struct {
+       fn   func(s, t []byte) bool
+       s, t string
+       want bool
+}{
+       {equalFoldRight, "", "", true},
+       {equalFoldRight, "a", "a", true},
+       {equalFoldRight, "", "a", false},
+       {equalFoldRight, "a", "", false},
+       {equalFoldRight, "a", "A", true},
+       {equalFoldRight, "AB", "ab", true},
+       {equalFoldRight, "AB", "ac", false},
+       {equalFoldRight, "sbkKc", "ſbKKc", true},
+       {equalFoldRight, "SbKkc", "ſbKKc", true},
+       {equalFoldRight, "SbKkc", "ſbKK", false},
+       {equalFoldRight, "e", "é", false},
+       {equalFoldRight, "s", "S", true},
+
+       {simpleLetterEqualFold, "", "", true},
+       {simpleLetterEqualFold, "abc", "abc", true},
+       {simpleLetterEqualFold, "abc", "ABC", true},
+       {simpleLetterEqualFold, "abc", "ABCD", false},
+       {simpleLetterEqualFold, "abc", "xxx", false},
+
+       {asciiEqualFold, "a_B", "A_b", true},
+       {asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent
+}
+
+func TestFold(t *testing.T) {
+       for i, tt := range foldTests {
+               if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want {
+                       t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want)
+               }
+               truth := strings.EqualFold(tt.s, tt.t)
+               if truth != tt.want {
+                       t.Errorf("strings.EqualFold doesn't agree with case %d", i)
+               }
+       }
+}
+
+func TestFoldAgainstUnicode(t *testing.T) {
+       const bufSize = 5
+       buf1 := make([]byte, 0, bufSize)
+       buf2 := make([]byte, 0, bufSize)
+       var runes []rune
+       for i := 0x20; i <= 0x7f; i++ {
+               runes = append(runes, rune(i))
+       }
+       runes = append(runes, kelvin, smallLongEss)
+
+       funcs := []struct {
+               name   string
+               fold   func(s, t []byte) bool
+               letter bool // must be ASCII letter
+               simple bool // must be simple ASCII letter (not 'S' or 'K')
+       }{
+               {
+                       name: "equalFoldRight",
+                       fold: equalFoldRight,
+               },
+               {
+                       name:   "asciiEqualFold",
+                       fold:   asciiEqualFold,
+                       simple: true,
+               },
+               {
+                       name:   "simpleLetterEqualFold",
+                       fold:   simpleLetterEqualFold,
+                       simple: true,
+                       letter: true,
+               },
+       }
+
+       for _, ff := range funcs {
+               for _, r := range runes {
+                       if r >= utf8.RuneSelf {
+                               continue
+                       }
+                       if ff.letter && !isASCIILetter(byte(r)) {
+                               continue
+                       }
+                       if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') {
+                               continue
+                       }
+                       for _, r2 := range runes {
+                               buf1 := append(buf1[:0], 'x')
+                               buf2 := append(buf2[:0], 'x')
+                               buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)]
+                               buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)]
+                               buf1 = append(buf1, 'x')
+                               buf2 = append(buf2, 'x')
+                               want := bytes.EqualFold(buf1, buf2)
+                               if got := ff.fold(buf1, buf2); got != want {
+                                       t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want)
+                               }
+                       }
+               }
+       }
+}
+
+func isASCIILetter(b byte) bool {
+       return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z')
+}