From: Brad Fitzpatrick <bradfitz@golang.org>
Date: Wed, 18 Dec 2013 15:30:21 +0000 (-0800)
Subject: encoding/json: speed up decoding
X-Git-Tag: go1.3beta1~1180
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=626da8d73741b0cdeaa1acc048fec9ec8286f2b5;p=gostls13.git

encoding/json: speed up decoding

Don't make copies of keys while decoding, and don't use the
expensive strings.EqualFold when it's not necessary. Instead,
note in the existing field cache what algorithm to use to
check fold equality... most keys are just ASCII letters.

benchmark               old ns/op    new ns/op    delta
BenchmarkCodeDecoder    137074314    103974418  -24.15%

benchmark                old MB/s     new MB/s  speedup
BenchmarkCodeDecoder        14.16        18.66    1.32x

Update #6496

R=golang-dev, rsc, adg, r, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/13894045
---

diff --git a/src/pkg/encoding/json/decode.go b/src/pkg/encoding/json/decode.go
index 458fb39ec0..4db566726e 100644
--- a/src/pkg/encoding/json/decode.go
+++ b/src/pkg/encoding/json/decode.go
@@ -8,6 +8,7 @@
 package json
 
 import (
+	"bytes"
 	"encoding"
 	"encoding/base64"
 	"errors"
@@ -15,7 +16,6 @@ import (
 	"reflect"
 	"runtime"
 	"strconv"
-	"strings"
 	"unicode"
 	"unicode/utf16"
 	"unicode/utf8"
@@ -500,11 +500,11 @@ func (d *decodeState) object(v reflect.Value) {
 			d.error(errPhase)
 		}
 
-		// Read string key.
+		// Read key.
 		start := d.off - 1
 		op = d.scanWhile(scanContinue)
 		item := d.data[start : d.off-1]
-		key, ok := unquote(item)
+		key, ok := unquoteBytes(item)
 		if !ok {
 			d.error(errPhase)
 		}
@@ -526,11 +526,11 @@ func (d *decodeState) object(v reflect.Value) {
 			fields := cachedTypeFields(v.Type())
 			for i := range fields {
 				ff := &fields[i]
-				if ff.name == key {
+				if bytes.Equal(ff.nameBytes, key) {
 					f = ff
 					break
 				}
-				if f == nil && strings.EqualFold(ff.name, key) {
+				if f == nil && ff.equalFold(ff.nameBytes, key) {
 					f = ff
 				}
 			}
diff --git a/src/pkg/encoding/json/encode.go b/src/pkg/encoding/json/encode.go
index 7d6c71d7a9..8c71770ca4 100644
--- a/src/pkg/encoding/json/encode.go
+++ b/src/pkg/encoding/json/encode.go
@@ -936,6 +936,9 @@ func (e *encodeState) stringBytes(s []byte) (int, error) {
 // A field represents a single field found in a struct.
 type field struct {
 	name      string
+	nameBytes []byte                 // []byte(name)
+	equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
+
 	tag       bool
 	index     []int
 	typ       reflect.Type
@@ -943,6 +946,12 @@ type field struct {
 	quoted    bool
 }
 
+func fillField(f field) field {
+	f.nameBytes = []byte(f.name)
+	f.equalFold = foldFunc(f.nameBytes)
+	return f
+}
+
 // byName sorts field by name, breaking ties with depth,
 // then breaking ties with "name came from json tag", then
 // breaking ties with index sequence.
@@ -1042,8 +1051,14 @@ func typeFields(t reflect.Type) []field {
 					if name == "" {
 						name = sf.Name
 					}
-					fields = append(fields, field{name, tagged, index, ft,
-						opts.Contains("omitempty"), opts.Contains("string")})
+					fields = append(fields, fillField(field{
+						name:      name,
+						tag:       tagged,
+						index:     index,
+						typ:       ft,
+						omitEmpty: opts.Contains("omitempty"),
+						quoted:    opts.Contains("string"),
+					}))
 					if count[f.typ] > 1 {
 						// If there were multiple instances, add a second,
 						// so that the annihilation code will see a duplicate.
@@ -1057,7 +1072,7 @@ func typeFields(t reflect.Type) []field {
 				// Record new anonymous struct to explore in next round.
 				nextCount[ft]++
 				if nextCount[ft] == 1 {
-					next = append(next, field{name: ft.Name(), index: index, typ: ft})
+					next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
 				}
 			}
 		}
diff --git a/src/pkg/encoding/json/fold.go b/src/pkg/encoding/json/fold.go
new file mode 100644
index 0000000000..d6f77c93e5
--- /dev/null
+++ b/src/pkg/encoding/json/fold.go
@@ -0,0 +1,143 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"bytes"
+	"unicode/utf8"
+)
+
+const (
+	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
+	kelvin       = '\u212a'
+	smallLongEss = '\u017f'
+)
+
+// foldFunc returns one of four different case folding equivalence
+// functions, from most general (and slow) to fastest:
+//
+// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
+// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
+// 3) asciiEqualFold, no special, but includes non-letters (including _)
+// 4) simpleLetterEqualFold, no specials, no non-letters.
+//
+// The letters S and K are special because they map to 3 runes, not just 2:
+//  * S maps to s and to U+017F 'Å¿' Latin small letter long s
+//  * k maps to K and to U+212A 'âª' Kelvin sign
+// See http://play.golang.org/p/tTxjOc0OGo
+//
+// The returned function is specialized for matching against s and
+// should only be given s. It's not curried for performance reasons.
+func foldFunc(s []byte) func(s, t []byte) bool {
+	nonLetter := false
+	special := false // special letter
+	for _, b := range s {
+		if b >= utf8.RuneSelf {
+			return bytes.EqualFold
+		}
+		upper := b & caseMask
+		if upper < 'A' || upper > 'Z' {
+			nonLetter = true
+		} else if upper == 'K' || upper == 'S' {
+			// See above for why these letters are special.
+			special = true
+		}
+	}
+	if special {
+		return equalFoldRight
+	}
+	if nonLetter {
+		return asciiEqualFold
+	}
+	return simpleLetterEqualFold
+}
+
+// equalFoldRight is a specialization of bytes.EqualFold when s is
+// known to be all ASCII (including punctuation), but contains an 's',
+// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
+// See comments on foldFunc.
+func equalFoldRight(s, t []byte) bool {
+	for _, sb := range s {
+		if len(t) == 0 {
+			return false
+		}
+		tb := t[0]
+		if tb < utf8.RuneSelf {
+			if sb != tb {
+				sbUpper := sb & caseMask
+				if 'A' <= sbUpper && sbUpper <= 'Z' {
+					if sbUpper != tb&caseMask {
+						return false
+					}
+				} else {
+					return false
+				}
+			}
+			t = t[1:]
+			continue
+		}
+		// sb is ASCII and t is not. t must be either kelvin
+		// sign or long s; sb must be s, S, k, or K.
+		tr, size := utf8.DecodeRune(t)
+		switch sb {
+		case 's', 'S':
+			if tr != smallLongEss {
+				return false
+			}
+		case 'k', 'K':
+			if tr != kelvin {
+				return false
+			}
+		default:
+			return false
+		}
+		t = t[size:]
+
+	}
+	if len(t) > 0 {
+		return false
+	}
+	return true
+}
+
+// asciiEqualFold is a specialization of bytes.EqualFold for use when
+// s is all ASCII (but may contain non-letters) and contains no
+// special-folding letters.
+// See comments on foldFunc.
+func asciiEqualFold(s, t []byte) bool {
+	if len(s) != len(t) {
+		return false
+	}
+	for i, sb := range s {
+		tb := t[i]
+		if sb == tb {
+			continue
+		}
+		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
+			if sb&caseMask != tb&caseMask {
+				return false
+			}
+		} else {
+			return false
+		}
+	}
+	return true
+}
+
+// simpleLetterEqualFold is a specialization of bytes.EqualFold for
+// use when s is all ASCII letters (no underscores, etc) and also
+// doesn't contain 'k', 'K', 's', or 'S'.
+// See comments on foldFunc.
+func simpleLetterEqualFold(s, t []byte) bool {
+	if len(s) != len(t) {
+		return false
+	}
+	for i, b := range s {
+		if b&caseMask != t[i]&caseMask {
+			return false
+		}
+	}
+	return true
+}
diff --git a/src/pkg/encoding/json/fold_test.go b/src/pkg/encoding/json/fold_test.go
new file mode 100644
index 0000000000..9fb94646a8
--- /dev/null
+++ b/src/pkg/encoding/json/fold_test.go
@@ -0,0 +1,116 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+	"unicode/utf8"
+)
+
+var foldTests = []struct {
+	fn   func(s, t []byte) bool
+	s, t string
+	want bool
+}{
+	{equalFoldRight, "", "", true},
+	{equalFoldRight, "a", "a", true},
+	{equalFoldRight, "", "a", false},
+	{equalFoldRight, "a", "", false},
+	{equalFoldRight, "a", "A", true},
+	{equalFoldRight, "AB", "ab", true},
+	{equalFoldRight, "AB", "ac", false},
+	{equalFoldRight, "sbkKc", "Å¿bâªKc", true},
+	{equalFoldRight, "SbKkc", "Å¿bâªKc", true},
+	{equalFoldRight, "SbKkc", "Å¿bKK", false},
+	{equalFoldRight, "e", "Ã©", false},
+	{equalFoldRight, "s", "S", true},
+
+	{simpleLetterEqualFold, "", "", true},
+	{simpleLetterEqualFold, "abc", "abc", true},
+	{simpleLetterEqualFold, "abc", "ABC", true},
+	{simpleLetterEqualFold, "abc", "ABCD", false},
+	{simpleLetterEqualFold, "abc", "xxx", false},
+
+	{asciiEqualFold, "a_B", "A_b", true},
+	{asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent
+}
+
+func TestFold(t *testing.T) {
+	for i, tt := range foldTests {
+		if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want {
+			t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want)
+		}
+		truth := strings.EqualFold(tt.s, tt.t)
+		if truth != tt.want {
+			t.Errorf("strings.EqualFold doesn't agree with case %d", i)
+		}
+	}
+}
+
+func TestFoldAgainstUnicode(t *testing.T) {
+	const bufSize = 5
+	buf1 := make([]byte, 0, bufSize)
+	buf2 := make([]byte, 0, bufSize)
+	var runes []rune
+	for i := 0x20; i <= 0x7f; i++ {
+		runes = append(runes, rune(i))
+	}
+	runes = append(runes, kelvin, smallLongEss)
+
+	funcs := []struct {
+		name   string
+		fold   func(s, t []byte) bool
+		letter bool // must be ASCII letter
+		simple bool // must be simple ASCII letter (not 'S' or 'K')
+	}{
+		{
+			name: "equalFoldRight",
+			fold: equalFoldRight,
+		},
+		{
+			name:   "asciiEqualFold",
+			fold:   asciiEqualFold,
+			simple: true,
+		},
+		{
+			name:   "simpleLetterEqualFold",
+			fold:   simpleLetterEqualFold,
+			simple: true,
+			letter: true,
+		},
+	}
+
+	for _, ff := range funcs {
+		for _, r := range runes {
+			if r >= utf8.RuneSelf {
+				continue
+			}
+			if ff.letter && !isASCIILetter(byte(r)) {
+				continue
+			}
+			if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') {
+				continue
+			}
+			for _, r2 := range runes {
+				buf1 := append(buf1[:0], 'x')
+				buf2 := append(buf2[:0], 'x')
+				buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)]
+				buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)]
+				buf1 = append(buf1, 'x')
+				buf2 = append(buf2, 'x')
+				want := bytes.EqualFold(buf1, buf2)
+				if got := ff.fold(buf1, buf2); got != want {
+					t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want)
+				}
+			}
+		}
+	}
+}
+
+func isASCIILetter(b byte) bool {
+	return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z')
+}