From 5d11838654464c42de48958ff163360da38ab850 Mon Sep 17 00:00:00 2001 From: "Pascal S. de Kloe" Date: Thu, 29 Jun 2017 11:51:22 +0200 Subject: [PATCH] encoding/json: read ahead after value consumption MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Eliminates the need for an extra scanner, read undo and some other tricks. name old time/op new time/op delta CodeEncoder-12 1.92ms ± 0% 1.91ms ± 1% -0.65% (p=0.000 n=17+20) CodeMarshal-12 2.13ms ± 2% 2.12ms ± 1% -0.49% (p=0.038 n=18+17) CodeDecoder-12 8.55ms ± 2% 8.49ms ± 1% ~ (p=0.119 n=20+18) UnicodeDecoder-12 411ns ± 0% 422ns ± 0% +2.77% (p=0.000 n=19+15) DecoderStream-12 320ns ± 1% 307ns ± 1% -3.80% (p=0.000 n=18+20) CodeUnmarshal-12 9.65ms ± 3% 9.58ms ± 3% ~ (p=0.157 n=20+20) CodeUnmarshalReuse-12 8.54ms ± 3% 8.56ms ± 2% ~ (p=0.602 n=20+20) UnmarshalString-12 110ns ± 1% 87ns ± 2% -21.53% (p=0.000 n=16+20) UnmarshalFloat64-12 101ns ± 1% 77ns ± 2% -23.08% (p=0.000 n=19+20) UnmarshalInt64-12 94.5ns ± 2% 68.4ns ± 1% -27.60% (p=0.000 n=20+20) Issue10335-12 128ns ± 1% 100ns ± 1% -21.89% (p=0.000 n=19+18) Unmapped-12 427ns ± 3% 247ns ± 4% -42.17% (p=0.000 n=20+20) NumberIsValid-12 23.0ns ± 0% 21.7ns ± 0% -5.73% (p=0.000 n=20+20) NumberIsValidRegexp-12 641ns ± 0% 642ns ± 0% +0.15% (p=0.003 n=19+19) EncoderEncode-12 56.9ns ± 0% 55.0ns ± 1% -3.32% (p=0.012 n=2+17) name old speed new speed delta CodeEncoder-12 1.01GB/s ± 1% 1.02GB/s ± 1% +0.71% (p=0.000 n=18+20) CodeMarshal-12 913MB/s ± 2% 917MB/s ± 1% +0.49% (p=0.038 n=18+17) CodeDecoder-12 227MB/s ± 2% 229MB/s ± 1% ~ (p=0.110 n=20+18) UnicodeDecoder-12 34.1MB/s ± 0% 33.1MB/s ± 0% -2.73% (p=0.000 n=19+19) CodeUnmarshal-12 201MB/s ± 3% 203MB/s ± 3% ~ (p=0.151 n=20+20) name old alloc/op new alloc/op delta Issue10335-12 320B ± 0% 184B ± 0% -42.50% (p=0.000 n=20+20) Unmapped-12 568B ± 0% 216B ± 0% -61.97% (p=0.000 n=20+20) EncoderEncode-12 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta Issue10335-12 4.00 ± 0% 3.00 ± 0% -25.00% (p=0.000 n=20+20) Unmapped-12 18.0 ± 0% 4.0 ± 0% -77.78% (p=0.000 n=20+20) EncoderEncode-12 0.00 0.00 ~ (all equal) Fixes #17914 Updates #20693 Updates #10335 Change-Id: I0459a52febb8b79c9a2991e69ed2614cf8740429 Reviewed-on: https://go-review.googlesource.com/47152 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/encoding/json/decode.go | 330 ++++++++++++++---------------- src/encoding/json/scanner.go | 57 +----- src/encoding/json/scanner_test.go | 37 ---- 3 files changed, 157 insertions(+), 267 deletions(-) diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index dca328062f..a096356b3d 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -189,6 +189,7 @@ func (d *decodeState) unmarshal(v interface{}) (err error) { } d.scan.reset() + d.scanWhile(scanSkipSpace) // We decode rv not rv.Elem because the Unmarshaler interface // test must be applied at the top level of the value. d.value(rv) @@ -274,9 +275,9 @@ func isValidNumber(s string) bool { // decodeState represents the state while decoding a JSON value. type decodeState struct { data []byte - off int // read offset in data + off int // next read offset in data + opcode int // last read result scan scanner - nextscan scanner // for calls to nextValue errorContext struct { // provides context for type errors Struct string Field string @@ -286,6 +287,11 @@ type decodeState struct { disallowUnknownFields bool } +// readIndex returns the position of the last byte read. +func (d *decodeState) readIndex() int { + return d.off - 1 +} + // errPhase is used for errors that should not happen unless // there is a bug in the JSON decoder or something is editing // the data slice while the decoder executes. @@ -326,94 +332,83 @@ func (d *decodeState) addErrorContext(err error) error { return err } -// next cuts off and returns the next full JSON value in d.data[d.off:]. -// The next value is known to be an object or array, not a literal. -func (d *decodeState) next() []byte { - c := d.data[d.off] - item, rest, err := nextValue(d.data[d.off:], &d.nextscan) - if err != nil { - d.error(err) +// skip scans to the end of what was started. +func (d *decodeState) skip() { + s, data, i := &d.scan, d.data, d.off + depth := len(s.parseState) + for { + op := s.step(s, data[i]) + i++ + if len(s.parseState) < depth { + d.off = i + d.opcode = op + return + } } - d.off = len(d.data) - len(rest) +} - // Our scanner has seen the opening brace/bracket - // and thinks we're still in the middle of the object. - // invent a closing brace/bracket to get it out. - if c == '{' { - d.scan.step(&d.scan, '}') +// scanNext processes the byte at d.data[d.off]. +func (d *decodeState) scanNext() { + s, data, i := &d.scan, d.data, d.off + if i < len(data) { + d.opcode = s.step(s, data[i]) + d.off = i + 1 } else { - d.scan.step(&d.scan, ']') + d.opcode = s.eof() + d.off = len(data) + 1 // mark processed EOF with len+1 } - - return item } // scanWhile processes bytes in d.data[d.off:] until it // receives a scan code not equal to op. -// It updates d.off and returns the new scan code. -func (d *decodeState) scanWhile(op int) int { - var newOp int - for { - if d.off >= len(d.data) { - newOp = d.scan.eof() - d.off = len(d.data) + 1 // mark processed EOF with len+1 - } else { - c := d.data[d.off] - d.off++ - newOp = d.scan.step(&d.scan, c) - } +func (d *decodeState) scanWhile(op int) { + s, data, i := &d.scan, d.data, d.off + for i < len(d.data) { + newOp := s.step(s, data[i]) + i++ if newOp != op { - break + d.opcode = newOp + d.off = i + return } } - return newOp + + d.off = len(d.data) + 1 // mark processed EOF with len+1 + d.opcode = d.scan.eof() } -// value decodes a JSON value from d.data[d.off:] into the value. -// it updates d.off to point past the decoded value. +// value consumes a JSON value from d.data[d.off-1:], decoding into v, and +// reads the following byte ahead. If v is invalid, the value is discarded. +// The first byte of the value has been read already. func (d *decodeState) value(v reflect.Value) { - if !v.IsValid() { - _, rest, err := nextValue(d.data[d.off:], &d.nextscan) - if err != nil { - d.error(err) - } - d.off = len(d.data) - len(rest) - - // d.scan thinks we're still at the beginning of the item. - // Feed in an empty string - the shortest, simplest value - - // so that it knows we got to the end of the value. - if d.scan.redo { - // rewind. - d.scan.redo = false - d.scan.step = stateBeginValue - } - d.scan.step(&d.scan, '"') - d.scan.step(&d.scan, '"') - - n := len(d.scan.parseState) - if n > 0 && d.scan.parseState[n-1] == parseObjectKey { - // d.scan thinks we just read an object key; finish the object - d.scan.step(&d.scan, ':') - d.scan.step(&d.scan, '"') - d.scan.step(&d.scan, '"') - d.scan.step(&d.scan, '}') - } - - return - } - - switch op := d.scanWhile(scanSkipSpace); op { + switch d.opcode { default: d.error(errPhase) case scanBeginArray: - d.array(v) + if v.IsValid() { + d.array(v) + } else { + d.skip() + } + d.scanNext() case scanBeginObject: - d.object(v) + if v.IsValid() { + d.object(v) + } else { + d.skip() + } + d.scanNext() case scanBeginLiteral: - d.literal(v) + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.scanWhile(scanContinue) + + if v.IsValid() { + d.literalStore(d.data[start:d.readIndex()], v, false) + } } } @@ -424,15 +419,17 @@ type unquotedValue struct{} // If it finds anything other than a quoted string literal or null, // valueQuoted returns unquotedValue{}. func (d *decodeState) valueQuoted() interface{} { - switch op := d.scanWhile(scanSkipSpace); op { + switch d.opcode { default: d.error(errPhase) case scanBeginArray: - d.array(reflect.Value{}) + d.skip() + d.scanNext() case scanBeginObject: - d.object(reflect.Value{}) + d.skip() + d.scanNext() case scanBeginLiteral: switch v := d.literalInterface().(type) { @@ -447,7 +444,7 @@ func (d *decodeState) valueQuoted() interface{} { // until it gets to a non-pointer. // if it encounters an Unmarshaler, indirect stops and returns that. // if decodingNull is true, indirect stops at the last pointer so it can be set to nil. -func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { +func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { // Issue #24153 indicates that it is generally not a guaranteed property // that you may round-trip a reflect.Value by calling Value.Addr().Elem() // and expect the value to still be settable for values derived from @@ -512,14 +509,15 @@ func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, return nil, nil, v } -// array consumes an array from d.data[d.off-1:], decoding into the value v. -// the first byte of the array ('[') has been read already. +// array consumes an array from d.data[d.off-1:], decoding into v. +// The first byte of the array ('[') has been read already. func (d *decodeState) array(v reflect.Value) { // Check for unmarshaler. - u, ut, pv := d.indirect(v, false) + u, ut, pv := indirect(v, false) if u != nil { - d.off-- - err := u.UnmarshalJSON(d.next()) + start := d.readIndex() + d.skip() + err := u.UnmarshalJSON(d.data[start:d.off]) if err != nil { d.error(err) } @@ -527,11 +525,9 @@ func (d *decodeState) array(v reflect.Value) { } if ut != nil { d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) - d.off-- - d.next() + d.skip() return } - v = pv // Check type of target. @@ -546,8 +542,7 @@ func (d *decodeState) array(v reflect.Value) { fallthrough default: d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) - d.off-- - d.next() + d.skip() return case reflect.Array: case reflect.Slice: @@ -557,15 +552,11 @@ func (d *decodeState) array(v reflect.Value) { i := 0 for { // Look ahead for ] - can only happen on first iteration. - op := d.scanWhile(scanSkipSpace) - if op == scanEndArray { + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { break } - // Back up so d.value can have the byte we just read. - d.off-- - d.scan.undo(op) - // Get element of array, growing if necessary. if v.Kind() == reflect.Slice { // Grow slice if necessary @@ -593,11 +584,13 @@ func (d *decodeState) array(v reflect.Value) { i++ // Next token must be , or ]. - op = d.scanWhile(scanSkipSpace) - if op == scanEndArray { + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { break } - if op != scanArrayValue { + if d.opcode != scanArrayValue { d.error(errPhase) } } @@ -621,14 +614,15 @@ func (d *decodeState) array(v reflect.Value) { var nullLiteral = []byte("null") var textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem() -// object consumes an object from d.data[d.off-1:], decoding into the value v. -// the first byte ('{') of the object has been read already. +// object consumes an object from d.data[d.off-1:], decoding into v. +// The first byte ('{') of the object has been read already. func (d *decodeState) object(v reflect.Value) { // Check for unmarshaler. - u, ut, pv := d.indirect(v, false) + u, ut, pv := indirect(v, false) if u != nil { - d.off-- - err := u.UnmarshalJSON(d.next()) + start := d.readIndex() + d.skip() + err := u.UnmarshalJSON(d.data[start:d.off]) if err != nil { d.error(err) } @@ -636,8 +630,7 @@ func (d *decodeState) object(v reflect.Value) { } if ut != nil { d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) - d.off-- - d.next() // skip over { } in input + d.skip() return } v = pv @@ -664,8 +657,7 @@ func (d *decodeState) object(v reflect.Value) { default: if !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) { d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) - d.off-- - d.next() // skip over { } in input + d.skip() return } } @@ -676,8 +668,7 @@ func (d *decodeState) object(v reflect.Value) { // ok default: d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) - d.off-- - d.next() // skip over { } in input + d.skip() return } @@ -685,19 +676,19 @@ func (d *decodeState) object(v reflect.Value) { for { // Read opening " of string key or closing }. - op := d.scanWhile(scanSkipSpace) - if op == scanEndObject { + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { // closing } - can only happen on first iteration. break } - if op != scanBeginLiteral { + if d.opcode != scanBeginLiteral { d.error(errPhase) } // Read key. - start := d.off - 1 - op = d.scanWhile(scanContinue) - item := d.data[start : d.off-1] + start := d.readIndex() + d.scanWhile(scanContinue) + item := d.data[start:d.readIndex()] key, ok := unquoteBytes(item) if !ok { d.error(errPhase) @@ -761,12 +752,13 @@ func (d *decodeState) object(v reflect.Value) { } // Read : before value. - if op == scanSkipSpace { - op = d.scanWhile(scanSkipSpace) + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) } - if op != scanObjectKey { + if d.opcode != scanObjectKey { d.error(errPhase) } + d.scanWhile(scanSkipSpace) if destring { switch qv := d.valueQuoted().(type) { @@ -819,11 +811,13 @@ func (d *decodeState) object(v reflect.Value) { } // Next token must be , or }. - op = d.scanWhile(scanSkipSpace) - if op == scanEndObject { + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndObject { break } - if op != scanObjectValue { + if d.opcode != scanObjectValue { d.error(errPhase) } @@ -832,21 +826,6 @@ func (d *decodeState) object(v reflect.Value) { } } -// literal consumes a literal from d.data[d.off-1:], decoding into the value v. -// The first byte of the literal has been read already -// (that's how the caller knows it's a literal). -func (d *decodeState) literal(v reflect.Value) { - // All bytes inside literal return scanContinue op code. - start := d.off - 1 - op := d.scanWhile(scanContinue) - - // Scan read one byte too far; back up. - d.off-- - d.scan.undo(op) - - d.literalStore(d.data[start:d.off], v, false) -} - // convertNumber converts the number literal s to a float64 or a Number // depending on the setting of d.useNumber. func (d *decodeState) convertNumber(s string) (interface{}, error) { @@ -875,7 +854,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool return } isNull := item[0] == 'n' // null - u, ut, pv := d.indirect(v, isNull) + u, ut, pv := indirect(v, isNull) if u != nil { err := u.UnmarshalJSON(item) if err != nil { @@ -897,7 +876,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool default: val = "number" } - d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) } return } @@ -944,7 +923,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if fromQuoted { d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) } else { - d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) } case reflect.Bool: v.SetBool(value) @@ -952,7 +931,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if v.NumMethod() == 0 { v.Set(reflect.ValueOf(value)) } else { - d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) } } @@ -967,10 +946,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } switch v.Kind() { default: - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) case reflect.Slice: if v.Type().Elem().Kind() != reflect.Uint8 { - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) break } b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) @@ -986,7 +965,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if v.NumMethod() == 0 { v.Set(reflect.ValueOf(string(s))) } else { - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) } } @@ -1011,7 +990,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool if fromQuoted { d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) } else { - d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)}) + d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) } case reflect.Interface: n, err := d.convertNumber(s) @@ -1020,7 +999,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool break } if v.NumMethod() != 0 { - d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) break } v.Set(reflect.ValueOf(n)) @@ -1028,7 +1007,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: n, err := strconv.ParseInt(s, 10, 64) if err != nil || v.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())}) break } v.SetInt(n) @@ -1036,7 +1015,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: n, err := strconv.ParseUint(s, 10, 64) if err != nil || v.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())}) break } v.SetUint(n) @@ -1044,7 +1023,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool case reflect.Float32, reflect.Float64: n, err := strconv.ParseFloat(s, v.Type().Bits()) if err != nil || v.OverflowFloat(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())}) break } v.SetFloat(n) @@ -1057,18 +1036,21 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool // but they avoid the weight of reflection in this common case. // valueInterface is like value but returns interface{} -func (d *decodeState) valueInterface() interface{} { - switch d.scanWhile(scanSkipSpace) { +func (d *decodeState) valueInterface() (val interface{}) { + switch d.opcode { default: d.error(errPhase) panic("unreachable") case scanBeginArray: - return d.arrayInterface() + val = d.arrayInterface() + d.scanNext() case scanBeginObject: - return d.objectInterface() + val = d.objectInterface() + d.scanNext() case scanBeginLiteral: - return d.literalInterface() + val = d.literalInterface() } + return } // arrayInterface is like array but returns []interface{}. @@ -1076,23 +1058,21 @@ func (d *decodeState) arrayInterface() []interface{} { var v = make([]interface{}, 0) for { // Look ahead for ] - can only happen on first iteration. - op := d.scanWhile(scanSkipSpace) - if op == scanEndArray { + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { break } - // Back up so d.value can have the byte we just read. - d.off-- - d.scan.undo(op) - v = append(v, d.valueInterface()) // Next token must be , or ]. - op = d.scanWhile(scanSkipSpace) - if op == scanEndArray { + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { break } - if op != scanArrayValue { + if d.opcode != scanArrayValue { d.error(errPhase) } } @@ -1104,57 +1084,59 @@ func (d *decodeState) objectInterface() map[string]interface{} { m := make(map[string]interface{}) for { // Read opening " of string key or closing }. - op := d.scanWhile(scanSkipSpace) - if op == scanEndObject { + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { // closing } - can only happen on first iteration. break } - if op != scanBeginLiteral { + if d.opcode != scanBeginLiteral { d.error(errPhase) } // Read string key. - start := d.off - 1 - op = d.scanWhile(scanContinue) - item := d.data[start : d.off-1] + start := d.readIndex() + d.scanWhile(scanContinue) + item := d.data[start:d.readIndex()] key, ok := unquote(item) if !ok { d.error(errPhase) } // Read : before value. - if op == scanSkipSpace { - op = d.scanWhile(scanSkipSpace) + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) } - if op != scanObjectKey { + if d.opcode != scanObjectKey { d.error(errPhase) } + d.scanWhile(scanSkipSpace) // Read value. m[key] = d.valueInterface() // Next token must be , or }. - op = d.scanWhile(scanSkipSpace) - if op == scanEndObject { + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndObject { break } - if op != scanObjectValue { + if d.opcode != scanObjectValue { d.error(errPhase) } } return m } -// literalInterface is like literal but returns an interface value. +// literalInterface consumes and returns a literal from d.data[d.off-1:] and +// it reads the following byte ahead. The first byte of the literal has been +// read already (that's how the caller knows it's a literal). func (d *decodeState) literalInterface() interface{} { // All bytes inside literal return scanContinue op code. - start := d.off - 1 - op := d.scanWhile(scanContinue) + start := d.readIndex() + d.scanWhile(scanContinue) - // Scan read one byte too far; back up. - d.off-- - d.scan.undo(op) - item := d.data[start:d.off] + item := d.data[start:d.readIndex()] switch c := item[0]; c { case 'n': // null diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go index ae34418d1d..9e6d482e16 100644 --- a/src/encoding/json/scanner.go +++ b/src/encoding/json/scanner.go @@ -8,7 +8,7 @@ package json // Just about at the limit of what is reasonable to write by hand. // Some parts are a bit tedious, but overall it nicely factors out the // otherwise common code from the multiple scanning functions -// in this package (Compact, Indent, checkValid, nextValue, etc). +// in this package (Compact, Indent, checkValid, etc). // // This file starts with two simple examples using the scanner // before diving into the scanner itself. @@ -36,35 +36,6 @@ func checkValid(data []byte, scan *scanner) error { return nil } -// nextValue splits data after the next whole JSON value, -// returning that value and the bytes that follow it as separate slices. -// scan is passed in for use by nextValue to avoid an allocation. -func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) { - scan.reset() - for i, c := range data { - v := scan.step(scan, c) - if v >= scanEndObject { - switch v { - // probe the scanner with a space to determine whether we will - // get scanEnd on the next character. Otherwise, if the next character - // is not a space, scanEndTop allocates a needless error. - case scanEndObject, scanEndArray: - if scan.step(scan, ' ') == scanEnd { - return data[:i+1], data[i+1:], nil - } - case scanError: - return nil, nil, scan.err - case scanEnd: - return data[:i], data[i:], nil - } - } - } - if scan.eof() == scanError { - return nil, nil, scan.err - } - return data, nil, nil -} - // A SyntaxError is a description of a JSON syntax error. type SyntaxError struct { msg string // description of error @@ -101,11 +72,6 @@ type scanner struct { // Error that happened, if any. err error - // 1-byte redo (see undo method) - redo bool - redoCode int - redoState func(*scanner, byte) int - // total bytes consumed, updated by decoder.Decode bytes int64 } @@ -151,7 +117,6 @@ func (s *scanner) reset() { s.step = stateBeginValue s.parseState = s.parseState[0:0] s.err = nil - s.redo = false s.endTop = false } @@ -184,7 +149,6 @@ func (s *scanner) pushParseState(p int) { func (s *scanner) popParseState() { n := len(s.parseState) - 1 s.parseState = s.parseState[0:n] - s.redo = false if n == 0 { s.step = stateEndTop s.endTop = true @@ -607,22 +571,3 @@ func quoteChar(c byte) string { s := strconv.Quote(string(c)) return "'" + s[1:len(s)-1] + "'" } - -// undo causes the scanner to return scanCode from the next state transition. -// This gives callers a simple 1-byte undo mechanism. -func (s *scanner) undo(scanCode int) { - if s.redo { - panic("json: invalid use of scanner") - } - s.redoCode = scanCode - s.redoState = s.step - s.step = stateRedo - s.redo = true -} - -// stateRedo helps implement the scanner's 1-byte undo. -func stateRedo(s *scanner, c byte) int { - s.redo = false - s.step = s.redoState - return s.redoCode -} diff --git a/src/encoding/json/scanner_test.go b/src/encoding/json/scanner_test.go index 0d4518a632..6cdbe7d301 100644 --- a/src/encoding/json/scanner_test.go +++ b/src/encoding/json/scanner_test.go @@ -200,43 +200,6 @@ func TestIndentErrors(t *testing.T) { } } -func TestNextValueBig(t *testing.T) { - initBig() - var scan scanner - item, rest, err := nextValue(jsonBig, &scan) - if err != nil { - t.Fatalf("nextValue: %s", err) - } - if len(item) != len(jsonBig) || &item[0] != &jsonBig[0] { - t.Errorf("invalid item: %d %d", len(item), len(jsonBig)) - } - if len(rest) != 0 { - t.Errorf("invalid rest: %d", len(rest)) - } - - item, rest, err = nextValue(append(jsonBig, "HELLO WORLD"...), &scan) - if err != nil { - t.Fatalf("nextValue extra: %s", err) - } - if len(item) != len(jsonBig) { - t.Errorf("invalid item: %d %d", len(item), len(jsonBig)) - } - if string(rest) != "HELLO WORLD" { - t.Errorf("invalid rest: %d", len(rest)) - } -} - -var benchScan scanner - -func BenchmarkSkipValue(b *testing.B) { - initBig() - b.ResetTimer() - for i := 0; i < b.N; i++ { - nextValue(jsonBig, &benchScan) - } - b.SetBytes(int64(len(jsonBig))) -} - func diff(t *testing.T, a, b []byte) { for i := 0; ; i++ { if i >= len(a) || i >= len(b) || a[i] != b[i] { -- 2.48.1