From: Brad Fitzpatrick Date: Wed, 18 Dec 2013 15:30:21 +0000 (-0800) Subject: encoding/json: speed up decoding X-Git-Tag: go1.3beta1~1180 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=626da8d73741b0cdeaa1acc048fec9ec8286f2b5;p=gostls13.git encoding/json: speed up decoding Don't make copies of keys while decoding, and don't use the expensive strings.EqualFold when it's not necessary. Instead, note in the existing field cache what algorithm to use to check fold equality... most keys are just ASCII letters. benchmark old ns/op new ns/op delta BenchmarkCodeDecoder 137074314 103974418 -24.15% benchmark old MB/s new MB/s speedup BenchmarkCodeDecoder 14.16 18.66 1.32x Update #6496 R=golang-dev, rsc, adg, r, mikioh.mikioh CC=golang-dev https://golang.org/cl/13894045 --- diff --git a/src/pkg/encoding/json/decode.go b/src/pkg/encoding/json/decode.go index 458fb39ec0..4db566726e 100644 --- a/src/pkg/encoding/json/decode.go +++ b/src/pkg/encoding/json/decode.go @@ -8,6 +8,7 @@ package json import ( + "bytes" "encoding" "encoding/base64" "errors" @@ -15,7 +16,6 @@ import ( "reflect" "runtime" "strconv" - "strings" "unicode" "unicode/utf16" "unicode/utf8" @@ -500,11 +500,11 @@ func (d *decodeState) object(v reflect.Value) { d.error(errPhase) } - // Read string key. + // Read key. start := d.off - 1 op = d.scanWhile(scanContinue) item := d.data[start : d.off-1] - key, ok := unquote(item) + key, ok := unquoteBytes(item) if !ok { d.error(errPhase) } @@ -526,11 +526,11 @@ func (d *decodeState) object(v reflect.Value) { fields := cachedTypeFields(v.Type()) for i := range fields { ff := &fields[i] - if ff.name == key { + if bytes.Equal(ff.nameBytes, key) { f = ff break } - if f == nil && strings.EqualFold(ff.name, key) { + if f == nil && ff.equalFold(ff.nameBytes, key) { f = ff } } diff --git a/src/pkg/encoding/json/encode.go b/src/pkg/encoding/json/encode.go index 7d6c71d7a9..8c71770ca4 100644 --- a/src/pkg/encoding/json/encode.go +++ b/src/pkg/encoding/json/encode.go @@ -936,6 +936,9 @@ func (e *encodeState) stringBytes(s []byte) (int, error) { // A field represents a single field found in a struct. type field struct { name string + nameBytes []byte // []byte(name) + equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent + tag bool index []int typ reflect.Type @@ -943,6 +946,12 @@ type field struct { quoted bool } +func fillField(f field) field { + f.nameBytes = []byte(f.name) + f.equalFold = foldFunc(f.nameBytes) + return f +} + // byName sorts field by name, breaking ties with depth, // then breaking ties with "name came from json tag", then // breaking ties with index sequence. @@ -1042,8 +1051,14 @@ func typeFields(t reflect.Type) []field { if name == "" { name = sf.Name } - fields = append(fields, field{name, tagged, index, ft, - opts.Contains("omitempty"), opts.Contains("string")}) + fields = append(fields, fillField(field{ + name: name, + tag: tagged, + index: index, + typ: ft, + omitEmpty: opts.Contains("omitempty"), + quoted: opts.Contains("string"), + })) if count[f.typ] > 1 { // If there were multiple instances, add a second, // so that the annihilation code will see a duplicate. @@ -1057,7 +1072,7 @@ func typeFields(t reflect.Type) []field { // Record new anonymous struct to explore in next round. nextCount[ft]++ if nextCount[ft] == 1 { - next = append(next, field{name: ft.Name(), index: index, typ: ft}) + next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft})) } } } diff --git a/src/pkg/encoding/json/fold.go b/src/pkg/encoding/json/fold.go new file mode 100644 index 0000000000..d6f77c93e5 --- /dev/null +++ b/src/pkg/encoding/json/fold.go @@ -0,0 +1,143 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "unicode/utf8" +) + +const ( + caseMask = ^byte(0x20) // Mask to ignore case in ASCII. + kelvin = '\u212a' + smallLongEss = '\u017f' +) + +// foldFunc returns one of four different case folding equivalence +// functions, from most general (and slow) to fastest: +// +// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 +// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') +// 3) asciiEqualFold, no special, but includes non-letters (including _) +// 4) simpleLetterEqualFold, no specials, no non-letters. +// +// The letters S and K are special because they map to 3 runes, not just 2: +// * S maps to s and to U+017F 'ſ' Latin small letter long s +// * k maps to K and to U+212A 'K' Kelvin sign +// See http://play.golang.org/p/tTxjOc0OGo +// +// The returned function is specialized for matching against s and +// should only be given s. It's not curried for performance reasons. +func foldFunc(s []byte) func(s, t []byte) bool { + nonLetter := false + special := false // special letter + for _, b := range s { + if b >= utf8.RuneSelf { + return bytes.EqualFold + } + upper := b & caseMask + if upper < 'A' || upper > 'Z' { + nonLetter = true + } else if upper == 'K' || upper == 'S' { + // See above for why these letters are special. + special = true + } + } + if special { + return equalFoldRight + } + if nonLetter { + return asciiEqualFold + } + return simpleLetterEqualFold +} + +// equalFoldRight is a specialization of bytes.EqualFold when s is +// known to be all ASCII (including punctuation), but contains an 's', +// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. +// See comments on foldFunc. +func equalFoldRight(s, t []byte) bool { + for _, sb := range s { + if len(t) == 0 { + return false + } + tb := t[0] + if tb < utf8.RuneSelf { + if sb != tb { + sbUpper := sb & caseMask + if 'A' <= sbUpper && sbUpper <= 'Z' { + if sbUpper != tb&caseMask { + return false + } + } else { + return false + } + } + t = t[1:] + continue + } + // sb is ASCII and t is not. t must be either kelvin + // sign or long s; sb must be s, S, k, or K. + tr, size := utf8.DecodeRune(t) + switch sb { + case 's', 'S': + if tr != smallLongEss { + return false + } + case 'k', 'K': + if tr != kelvin { + return false + } + default: + return false + } + t = t[size:] + + } + if len(t) > 0 { + return false + } + return true +} + +// asciiEqualFold is a specialization of bytes.EqualFold for use when +// s is all ASCII (but may contain non-letters) and contains no +// special-folding letters. +// See comments on foldFunc. +func asciiEqualFold(s, t []byte) bool { + if len(s) != len(t) { + return false + } + for i, sb := range s { + tb := t[i] + if sb == tb { + continue + } + if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { + if sb&caseMask != tb&caseMask { + return false + } + } else { + return false + } + } + return true +} + +// simpleLetterEqualFold is a specialization of bytes.EqualFold for +// use when s is all ASCII letters (no underscores, etc) and also +// doesn't contain 'k', 'K', 's', or 'S'. +// See comments on foldFunc. +func simpleLetterEqualFold(s, t []byte) bool { + if len(s) != len(t) { + return false + } + for i, b := range s { + if b&caseMask != t[i]&caseMask { + return false + } + } + return true +} diff --git a/src/pkg/encoding/json/fold_test.go b/src/pkg/encoding/json/fold_test.go new file mode 100644 index 0000000000..9fb94646a8 --- /dev/null +++ b/src/pkg/encoding/json/fold_test.go @@ -0,0 +1,116 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "bytes" + "strings" + "testing" + "unicode/utf8" +) + +var foldTests = []struct { + fn func(s, t []byte) bool + s, t string + want bool +}{ + {equalFoldRight, "", "", true}, + {equalFoldRight, "a", "a", true}, + {equalFoldRight, "", "a", false}, + {equalFoldRight, "a", "", false}, + {equalFoldRight, "a", "A", true}, + {equalFoldRight, "AB", "ab", true}, + {equalFoldRight, "AB", "ac", false}, + {equalFoldRight, "sbkKc", "ſbKKc", true}, + {equalFoldRight, "SbKkc", "ſbKKc", true}, + {equalFoldRight, "SbKkc", "ſbKK", false}, + {equalFoldRight, "e", "é", false}, + {equalFoldRight, "s", "S", true}, + + {simpleLetterEqualFold, "", "", true}, + {simpleLetterEqualFold, "abc", "abc", true}, + {simpleLetterEqualFold, "abc", "ABC", true}, + {simpleLetterEqualFold, "abc", "ABCD", false}, + {simpleLetterEqualFold, "abc", "xxx", false}, + + {asciiEqualFold, "a_B", "A_b", true}, + {asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent +} + +func TestFold(t *testing.T) { + for i, tt := range foldTests { + if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want { + t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want) + } + truth := strings.EqualFold(tt.s, tt.t) + if truth != tt.want { + t.Errorf("strings.EqualFold doesn't agree with case %d", i) + } + } +} + +func TestFoldAgainstUnicode(t *testing.T) { + const bufSize = 5 + buf1 := make([]byte, 0, bufSize) + buf2 := make([]byte, 0, bufSize) + var runes []rune + for i := 0x20; i <= 0x7f; i++ { + runes = append(runes, rune(i)) + } + runes = append(runes, kelvin, smallLongEss) + + funcs := []struct { + name string + fold func(s, t []byte) bool + letter bool // must be ASCII letter + simple bool // must be simple ASCII letter (not 'S' or 'K') + }{ + { + name: "equalFoldRight", + fold: equalFoldRight, + }, + { + name: "asciiEqualFold", + fold: asciiEqualFold, + simple: true, + }, + { + name: "simpleLetterEqualFold", + fold: simpleLetterEqualFold, + simple: true, + letter: true, + }, + } + + for _, ff := range funcs { + for _, r := range runes { + if r >= utf8.RuneSelf { + continue + } + if ff.letter && !isASCIILetter(byte(r)) { + continue + } + if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') { + continue + } + for _, r2 := range runes { + buf1 := append(buf1[:0], 'x') + buf2 := append(buf2[:0], 'x') + buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)] + buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)] + buf1 = append(buf1, 'x') + buf2 = append(buf2, 'x') + want := bytes.EqualFold(buf1, buf2) + if got := ff.fold(buf1, buf2); got != want { + t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want) + } + } + } + } +} + +func isASCIILetter(b byte) bool { + return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') +}