// Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token.
type ScanState interface {
- // GetRune reads the next rune (Unicode code point) from the input.
- // If invoked during Scanln, Fscanln, or Sscanln, GetRune() will
- // return EOF after returning the first '\n'.
- GetRune() (rune int, err os.Error)
- // UngetRune causes the next call to GetRune to return the same rune.
- UngetRune()
- // Width returns the value of the width option and whether it has been set.
- // The unit is Unicode code points.
- Width() (wid int, ok bool)
+ // ReadRune reads the next rune (Unicode code point) from the input.
+ // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
+ // return EOF after returning the first '\n' or when reading beyond
+ // the specified width.
+ ReadRune() (rune int, size int, err os.Error)
+ // UnreadRune causes the next call to ReadRune to return the same rune.
+ UnreadRune() os.Error
// Token returns the next space-delimited token from the input. If
// a width has been specified, the returned token will be no longer
// than the width.
Token() (token string, err os.Error)
+ // Width returns the value of the width option and whether it has been set.
+ // The unit is Unicode code points.
+ Width() (wid int, ok bool)
+ // Because ReadRune is implemented by the interface, Read should never be
+ // called by the scanning routines and a valid implementation of
+ // ScanState may choose always to return an error from Read.
+ Read(buf []byte) (n int, err os.Error)
}
// Scanner is implemented by any value that has a Scan method, which scans
// ss is the internal implementation of ScanState.
type ss struct {
- rr io.RuneReader // where to read input
- buf bytes.Buffer // token accumulator
- nlIsSpace bool // whether newline counts as white space
- nlIsEnd bool // whether newline terminates scan
- peekRune int // one-rune lookahead
- prevRune int // last rune returned by GetRune
- atEOF bool // already read EOF
- maxWid int // max width of field, in runes
- widPresent bool // width was specified
- wid int // width consumed so far; used in accept()
+ rr io.RuneReader // where to read input
+ buf bytes.Buffer // token accumulator
+ nlIsSpace bool // whether newline counts as white space
+ nlIsEnd bool // whether newline terminates scan
+ peekRune int // one-rune lookahead
+ prevRune int // last rune returned by ReadRune
+ atEOF bool // already read EOF
+ maxWid int // max width of field, in runes
+ wid int // width consumed so far; used in accept()
}
-func (s *ss) GetRune() (rune int, err os.Error) {
+// The Read method is only in ScanState so that ScanState
+// satisfies io.Reader. It will never be called when used as
+// intended, so there is no need to make it actually work.
+func (s *ss) Read(buf []byte) (n int, err os.Error) {
+ return 0, os.ErrorString("ScanState's Read should not be called. Use ReadRune")
+}
+
+func (s *ss) ReadRune() (rune int, size int, err os.Error) {
if s.peekRune >= 0 {
+ s.wid++
rune = s.peekRune
+ size = utf8.RuneLen(rune)
s.prevRune = rune
s.peekRune = -1
return
}
- if s.nlIsEnd && s.prevRune == '\n' {
- rune = EOF
+ if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.wid >= s.maxWid {
err = os.EOF
return
}
- rune, _, err = s.rr.ReadRune()
+
+ rune, size, err = s.rr.ReadRune()
if err == nil {
+ s.wid++
s.prevRune = rune
+ } else if err == os.EOF {
+ s.atEOF = true
}
return
}
func (s *ss) Width() (wid int, ok bool) {
- return s.maxWid, s.widPresent
+ if s.maxWid == hugeWid {
+ return 0, false
+ }
+ return s.maxWid, true
}
// The public method returns an error; this private one panics.
// If getRune reaches EOF, the return value is EOF (-1).
func (s *ss) getRune() (rune int) {
- if s.atEOF {
- return EOF
- }
- if s.peekRune >= 0 {
- rune = s.peekRune
- s.prevRune = rune
- s.peekRune = -1
- return
- }
- rune, _, err := s.rr.ReadRune()
- if err == nil {
- s.prevRune = rune
- } else if err != nil {
+ rune, _, err := s.ReadRune()
+ if err != nil {
if err == os.EOF {
- s.atEOF = true
return EOF
}
s.error(err)
return
}
-// mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
+// mustReadRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
// It is called in cases such as string scanning where an EOF is a
// syntax error.
-func (s *ss) mustGetRune() (rune int) {
- if s.atEOF {
+func (s *ss) mustReadRune() (rune int) {
+ rune = s.getRune()
+ if rune == EOF {
s.error(io.ErrUnexpectedEOF)
}
- if s.peekRune >= 0 {
- rune = s.peekRune
- s.peekRune = -1
- return
- }
- rune, _, err := s.rr.ReadRune()
- if err != nil {
- if err == os.EOF {
- err = io.ErrUnexpectedEOF
- }
- s.error(err)
- }
return
}
-
-func (s *ss) UngetRune() {
+func (s *ss) UnreadRune() os.Error {
if u, ok := s.rr.(runeUnreader); ok {
u.UnreadRune()
} else {
s.peekRune = s.prevRune
}
+ s.wid--
+ return nil
}
func (s *ss) error(err os.Error) {
s.prevRune = -1
s.peekRune = -1
s.atEOF = false
- s.maxWid = 0
- s.widPresent = false
+ s.maxWid = hugeWid
return s
}
return
}
if !unicode.IsSpace(rune) {
- s.UngetRune()
+ s.UnreadRune()
break
}
}
func (s *ss) token() string {
s.skipSpace(false)
// read until white space or newline
- for nrunes := 0; !s.widPresent || nrunes < s.maxWid; nrunes++ {
+ for {
rune := s.getRune()
if rune == EOF {
break
}
if unicode.IsSpace(rune) {
- s.UngetRune()
+ s.UnreadRune()
break
}
s.buf.WriteRune(rune)
// consume reads the next rune in the input and reports whether it is in the ok string.
// If accept is true, it puts the character into the input token.
func (s *ss) consume(ok string, accept bool) bool {
- if s.wid >= s.maxWid {
- return false
- }
rune := s.getRune()
if rune == EOF {
return false
if strings.IndexRune(ok, rune) >= 0 {
if accept {
s.buf.WriteRune(rune)
- s.wid++
}
return true
}
if rune != EOF && accept {
- s.UngetRune()
+ s.UnreadRune()
}
return false
}
func (s *ss) peek(ok string) bool {
rune := s.getRune()
if rune != EOF {
- s.UngetRune()
+ s.UnreadRune()
}
return strings.IndexRune(ok, rune) >= 0
}
return false
}
// Syntax-checking a boolean is annoying. We're not fastidious about case.
- switch s.mustGetRune() {
+ switch s.mustReadRune() {
case '0':
return false
case '1':
// scanRune returns the next rune value in the input.
func (s *ss) scanRune(bitSize int) int64 {
- rune := int64(s.mustGetRune())
+ rune := int64(s.mustReadRune())
n := uint(bitSize)
x := (rune << (64 - n)) >> (64 - n)
if x != rune {
// quotedString returns the double- or back-quoted string represented by the next input characters.
func (s *ss) quotedString() string {
- quote := s.mustGetRune()
+ quote := s.mustReadRune()
switch quote {
case '`':
// Back-quoted: Anything goes until EOF or back quote.
for {
- rune := s.mustGetRune()
+ rune := s.mustReadRune()
if rune == quote {
break
}
// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
s.buf.WriteRune(quote)
for {
- rune := s.mustGetRune()
+ rune := s.mustReadRune()
s.buf.WriteRune(rune)
if rune == '\\' {
// In a legal backslash escape, no matter how long, only the character
// immediately after the escape can itself be a backslash or quote.
// Thus we only need to protect the first character after the backslash.
- rune := s.mustGetRune()
+ rune := s.mustReadRune()
s.buf.WriteRune(rune)
} else if rune == '"' {
break
return
}
if unicode.IsSpace(rune1) {
- s.UngetRune()
+ s.UnreadRune()
return
}
- rune2 := s.mustGetRune()
+ rune2 := s.mustReadRune()
return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
}
const floatVerbs = "beEfFgGv"
+const hugeWid = 1 << 30
+
// scanOne scans a single value, deriving the scanner from the type of the argument.
func (s *ss) scanOne(verb int, field interface{}) {
s.buf.Reset()
}
return
}
- if !s.widPresent {
- s.maxWid = 1 << 30 // Huge
- }
- s.wid = 0
switch v := field.(type) {
case *bool:
*v = s.scanBool(verb)
}
// doScan does the real work for scanning without a format string.
-// At the moment, it handles only pointers to basic types.
func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) {
defer errorHandler(&err)
for _, field := range a {
s.skipSpace(true)
continue
}
- inputc := s.mustGetRune()
+ inputc := s.mustReadRune()
if fmtc != inputc {
- s.UngetRune()
+ s.UnreadRune()
return -1
}
i += w
i++ // % is one byte
// do we have 20 (width)?
- s.maxWid, s.widPresent, i = parsenum(format, i, end)
+ var widPresent bool
+ s.maxWid, widPresent, i = parsenum(format, i, end)
+ if !widPresent {
+ s.maxWid = hugeWid
+ }
+ s.wid = 0
c, w := utf8.DecodeRuneInString(format[i:])
i += w
s.scanOne(c, field)
numProcessed++
+ s.maxWid = hugeWid
}
if numProcessed < len(a) {
s.errorString("too many operands")
type Xs string
func (x *Xs) Scan(state ScanState, verb int) os.Error {
- var tok string
- var c int
- var err os.Error
- wid, present := state.Width()
- if !present {
- tok, err = state.Token()
- } else {
- for i := 0; i < wid; i++ {
- c, err = state.GetRune()
- if err != nil {
- break
- }
- tok += string(c)
- }
- }
+ tok, err := state.Token()
if err != nil {
return err
}
var xVal Xs
+// IntString accepts an integer followed immediately by a string.
+// It tests the embedding of a scan within a scan.
+type IntString struct {
+ i int
+ s string
+}
+
+func (s *IntString) Scan(state ScanState, verb int) os.Error {
+ if _, err := Fscan(state, &s.i); err != nil {
+ return err
+ }
+
+ if _, err := Fscan(state, &s.s); err != nil {
+ return err
+ }
+ return nil
+}
+
+var intStringVal IntString
+
// myStringReader implements Read but not ReadRune, allowing us to test our readRune wrapper
// type that creates something that can read runes given only Read().
type myStringReader struct {
{"114\n", &renamedStringVal, renamedString("114")},
{"115\n", &renamedBytesVal, renamedBytes([]byte("115"))},
- // Custom scanner.
+ // Custom scanners.
{" vvv ", &xVal, Xs("vvv")},
+ {" 1234hello", &intStringVal, IntString{1234, "hello"}},
// Fixed bugs
{"2147483648\n", &int64Val, int64(2147483648)}, // was: integer overflow
var s, t string
var c complex128
var x, y Xs
+var z IntString
var multiTests = []ScanfMultiTest{
{"", "", nil, nil, ""},
{"%d%s", "123abc", args(&i, &s), args(123, "abc"), ""},
{"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""},
- // Custom scanner.
+ // Custom scanners.
{"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""},
+ {"%4v%s", "12abcd", args(&z, &s), args(IntString{12, "ab"}, "cd"), ""},
// Errors
{"%t", "23 18", args(&i), nil, "bad verb"},
}
n, err := scan(r, test.in)
if err != nil {
- t.Errorf("%s got error scanning %q: %s", name, test.text, err)
+ m := ""
+ if n > 0 {
+ m = Sprintf(" (%d fields ok)", n)
+ }
+ t.Errorf("%s got error scanning %q: %s%s", name, test.text, err, m)
continue
}
if n != 1 {
func (t *TwoLines) Scan(state ScanState, verb int) os.Error {
chars := make([]int, 0, 100)
for nlCount := 0; nlCount < 2; {
- c, err := state.GetRune()
+ c, _, err := state.ReadRune()
if err != nil {
return err
}