// A field represents a single field found in a struct.
type field struct {
name string
+ nameBytes []byte // []byte(name)
+ equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
+
tag bool
index []int
typ reflect.Type
quoted bool
}
+func fillField(f field) field {
+ f.nameBytes = []byte(f.name)
+ f.equalFold = foldFunc(f.nameBytes)
+ return f
+}
+
// byName sorts field by name, breaking ties with depth,
// then breaking ties with "name came from json tag", then
// breaking ties with index sequence.
if name == "" {
name = sf.Name
}
- fields = append(fields, field{name, tagged, index, ft,
- opts.Contains("omitempty"), opts.Contains("string")})
+ fields = append(fields, fillField(field{
+ name: name,
+ tag: tagged,
+ index: index,
+ typ: ft,
+ omitEmpty: opts.Contains("omitempty"),
+ quoted: opts.Contains("string"),
+ }))
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
// Record new anonymous struct to explore in next round.
nextCount[ft]++
if nextCount[ft] == 1 {
- next = append(next, field{name: ft.Name(), index: index, typ: ft})
+ next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
}
}
}
--- /dev/null
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "bytes"
+ "unicode/utf8"
+)
+
+const (
+ caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
+ kelvin = '\u212a'
+ smallLongEss = '\u017f'
+)
+
+// foldFunc returns one of four different case folding equivalence
+// functions, from most general (and slow) to fastest:
+//
+// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
+// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
+// 3) asciiEqualFold, no special, but includes non-letters (including _)
+// 4) simpleLetterEqualFold, no specials, no non-letters.
+//
+// The letters S and K are special because they map to 3 runes, not just 2:
+// * S maps to s and to U+017F 'ſ' Latin small letter long s
+// * k maps to K and to U+212A 'K' Kelvin sign
+// See http://play.golang.org/p/tTxjOc0OGo
+//
+// The returned function is specialized for matching against s and
+// should only be given s. It's not curried for performance reasons.
+func foldFunc(s []byte) func(s, t []byte) bool {
+ nonLetter := false
+ special := false // special letter
+ for _, b := range s {
+ if b >= utf8.RuneSelf {
+ return bytes.EqualFold
+ }
+ upper := b & caseMask
+ if upper < 'A' || upper > 'Z' {
+ nonLetter = true
+ } else if upper == 'K' || upper == 'S' {
+ // See above for why these letters are special.
+ special = true
+ }
+ }
+ if special {
+ return equalFoldRight
+ }
+ if nonLetter {
+ return asciiEqualFold
+ }
+ return simpleLetterEqualFold
+}
+
+// equalFoldRight is a specialization of bytes.EqualFold when s is
+// known to be all ASCII (including punctuation), but contains an 's',
+// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
+// See comments on foldFunc.
+func equalFoldRight(s, t []byte) bool {
+ for _, sb := range s {
+ if len(t) == 0 {
+ return false
+ }
+ tb := t[0]
+ if tb < utf8.RuneSelf {
+ if sb != tb {
+ sbUpper := sb & caseMask
+ if 'A' <= sbUpper && sbUpper <= 'Z' {
+ if sbUpper != tb&caseMask {
+ return false
+ }
+ } else {
+ return false
+ }
+ }
+ t = t[1:]
+ continue
+ }
+ // sb is ASCII and t is not. t must be either kelvin
+ // sign or long s; sb must be s, S, k, or K.
+ tr, size := utf8.DecodeRune(t)
+ switch sb {
+ case 's', 'S':
+ if tr != smallLongEss {
+ return false
+ }
+ case 'k', 'K':
+ if tr != kelvin {
+ return false
+ }
+ default:
+ return false
+ }
+ t = t[size:]
+
+ }
+ if len(t) > 0 {
+ return false
+ }
+ return true
+}
+
+// asciiEqualFold is a specialization of bytes.EqualFold for use when
+// s is all ASCII (but may contain non-letters) and contains no
+// special-folding letters.
+// See comments on foldFunc.
+func asciiEqualFold(s, t []byte) bool {
+ if len(s) != len(t) {
+ return false
+ }
+ for i, sb := range s {
+ tb := t[i]
+ if sb == tb {
+ continue
+ }
+ if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
+ if sb&caseMask != tb&caseMask {
+ return false
+ }
+ } else {
+ return false
+ }
+ }
+ return true
+}
+
+// simpleLetterEqualFold is a specialization of bytes.EqualFold for
+// use when s is all ASCII letters (no underscores, etc) and also
+// doesn't contain 'k', 'K', 's', or 'S'.
+// See comments on foldFunc.
+func simpleLetterEqualFold(s, t []byte) bool {
+ if len(s) != len(t) {
+ return false
+ }
+ for i, b := range s {
+ if b&caseMask != t[i]&caseMask {
+ return false
+ }
+ }
+ return true
+}
--- /dev/null
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+ "unicode/utf8"
+)
+
+var foldTests = []struct {
+ fn func(s, t []byte) bool
+ s, t string
+ want bool
+}{
+ {equalFoldRight, "", "", true},
+ {equalFoldRight, "a", "a", true},
+ {equalFoldRight, "", "a", false},
+ {equalFoldRight, "a", "", false},
+ {equalFoldRight, "a", "A", true},
+ {equalFoldRight, "AB", "ab", true},
+ {equalFoldRight, "AB", "ac", false},
+ {equalFoldRight, "sbkKc", "ſbKKc", true},
+ {equalFoldRight, "SbKkc", "ſbKKc", true},
+ {equalFoldRight, "SbKkc", "ſbKK", false},
+ {equalFoldRight, "e", "é", false},
+ {equalFoldRight, "s", "S", true},
+
+ {simpleLetterEqualFold, "", "", true},
+ {simpleLetterEqualFold, "abc", "abc", true},
+ {simpleLetterEqualFold, "abc", "ABC", true},
+ {simpleLetterEqualFold, "abc", "ABCD", false},
+ {simpleLetterEqualFold, "abc", "xxx", false},
+
+ {asciiEqualFold, "a_B", "A_b", true},
+ {asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent
+}
+
+func TestFold(t *testing.T) {
+ for i, tt := range foldTests {
+ if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want {
+ t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want)
+ }
+ truth := strings.EqualFold(tt.s, tt.t)
+ if truth != tt.want {
+ t.Errorf("strings.EqualFold doesn't agree with case %d", i)
+ }
+ }
+}
+
+func TestFoldAgainstUnicode(t *testing.T) {
+ const bufSize = 5
+ buf1 := make([]byte, 0, bufSize)
+ buf2 := make([]byte, 0, bufSize)
+ var runes []rune
+ for i := 0x20; i <= 0x7f; i++ {
+ runes = append(runes, rune(i))
+ }
+ runes = append(runes, kelvin, smallLongEss)
+
+ funcs := []struct {
+ name string
+ fold func(s, t []byte) bool
+ letter bool // must be ASCII letter
+ simple bool // must be simple ASCII letter (not 'S' or 'K')
+ }{
+ {
+ name: "equalFoldRight",
+ fold: equalFoldRight,
+ },
+ {
+ name: "asciiEqualFold",
+ fold: asciiEqualFold,
+ simple: true,
+ },
+ {
+ name: "simpleLetterEqualFold",
+ fold: simpleLetterEqualFold,
+ simple: true,
+ letter: true,
+ },
+ }
+
+ for _, ff := range funcs {
+ for _, r := range runes {
+ if r >= utf8.RuneSelf {
+ continue
+ }
+ if ff.letter && !isASCIILetter(byte(r)) {
+ continue
+ }
+ if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') {
+ continue
+ }
+ for _, r2 := range runes {
+ buf1 := append(buf1[:0], 'x')
+ buf2 := append(buf2[:0], 'x')
+ buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)]
+ buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)]
+ buf1 = append(buf1, 'x')
+ buf2 = append(buf2, 'x')
+ want := bytes.EqualFold(buf1, buf2)
+ if got := ff.fold(buf1, buf2); got != want {
+ t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want)
+ }
+ }
+ }
+ }
+}
+
+func isASCIILetter(b byte) bool {
+ return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z')
+}