Scanln, Fscanln and Sscanln stop scanning at a newline and
require that the items be followed by a newline or EOF.
- Scanf, Fscanf and Sscanf require that (after skipping spaces)
- newlines in the format are matched by newlines in the input
- and vice versa. This behavior differs from the corresponding
- routines in C, which uniformly treat newlines as spaces.
-
- When scanning with Scanf, Fscanf, and Sscanf, all non-empty
- runs of space characters (except newline) are equivalent
- to a single space in both the format and the input. With
- that proviso, text in the format string must match the input
- text; scanning stops if it does not, with the return value
- of the function indicating the number of arguments scanned.
-
Scanf, Fscanf, and Sscanf parse the arguments according to a
- format string, analogous to that of Printf. For example, %x
- will scan an integer as a hexadecimal number, and %v will scan
- the default representation format for the value.
-
- The formats behave analogously to those of Printf with the
- following exceptions:
-
- %p is not implemented
- %T is not implemented
- %e %E %f %F %g %G are all equivalent and scan any floating point or complex value
- %s and %v on strings scan a space-delimited token
- Flags # and + are not implemented.
+ format string, analogous to that of Printf. In the text that
+ follows, 'space' means any Unicode whitespace character
+ except newline.
+
+ In the format string, a verb introduced by the % character
+ consumes and parses input; these verbs are described in more
+ detail below. A character other than %, space, or newline in
+ the format consumes exactly that input character, which must
+ be present. A newline with zero or more spaces before it in
+ the format string consumes zero or more spaces in the input
+ followed by a single newline or the end of the input. A space
+ following a newline in the format string consumes zero or more
+ spaces in the input. Otherwise, any run of one or more spaces
+ in the format string consumes as many spaces as possible in
+ the input. Unless the run of spaces in the format string
+ appears adjacent to a newline, the run must consume at least
+ one space from the input or find the end of the input.
+
+ The handling of spaces and newlines differs from that of C's
+ scanf family: in C, newlines are treated as any other space,
+ and it is never an error when a run of spaces in the format
+ string finds no spaces to consume in the input.
+
+ The verbs behave analogously to those of Printf.
+ For example, %x will scan an integer as a hexadecimal number,
+ and %v will scan the default representation format for the value.
+ The Printf verbs %p and %T and the flags # and + are not implemented,
+ and the verbs %e %E %f %F %g and %G are all equivalent and scan any
+ floating-point or complex value.
+
+ Input processed by verbs is implicitly space-delimited: the
+ implementation of every verb except %c starts by discarding
+ leading spaces from the remaining input, and the %s verb
+ (and %v reading into a string) stops consuming input at the first
+ space or newline character.
The familiar base-setting prefixes 0 (octal) and 0x
(hexadecimal) are accepted when scanning integers without
All arguments to be scanned must be either pointers to basic
types or implementations of the Scanner interface.
+ Like Scanf and Fscanf, Sscanf need not consume its entire input.
+ There is no way to recover how much of the input string Sscanf used.
+
Note: Fscan etc. can read one character (rune) past the input
they return, which means that a loop calling a scan routine
may skip some of the input. This is usually a problem only
func (s *ss) advance(format string) (i int) {
for i < len(format) {
fmtc, w := utf8.DecodeRuneInString(format[i:])
+
+ // Space processing.
+ // In the rest of this comment "space" means spaces other than newline.
+ // Newline in the format matches input of zero or more spaces and then newline or end-of-input.
+ // Spaces in the format before the newline are collapsed into the newline.
+ // Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
+ // Other spaces in the format match input of one or more spaces or end-of-input.
+ if isSpace(fmtc) {
+ newlines := 0
+ trailingSpace := false
+ for isSpace(fmtc) && i < len(format) {
+ if fmtc == '\n' {
+ newlines++
+ trailingSpace = false
+ } else {
+ trailingSpace = true
+ }
+ i += w
+ fmtc, w = utf8.DecodeRuneInString(format[i:])
+ }
+ for j := 0; j < newlines; j++ {
+ inputc := s.getRune()
+ for isSpace(inputc) && inputc != '\n' {
+ inputc = s.getRune()
+ }
+ if inputc != '\n' && inputc != eof {
+ s.errorString("newline in format does not match input")
+ }
+ }
+ if trailingSpace {
+ inputc := s.getRune()
+ if newlines == 0 {
+ // If the trailing space stood alone (did not follow a newline),
+ // it must find at least one space to consume.
+ if !isSpace(inputc) && inputc != eof {
+ s.errorString("expected space in input to match format")
+ }
+ if inputc == '\n' {
+ s.errorString("newline in input does not match format")
+ }
+ }
+ for isSpace(inputc) && inputc != '\n' {
+ inputc = s.getRune()
+ }
+ if inputc != eof {
+ s.UnreadRune()
+ }
+ }
+ continue
+ }
+
+ // Verbs.
if fmtc == '%' {
// % at end of string is an error.
if i+w == len(format) {
}
i += w // skip the first %
}
- sawSpace := false
- wasNewline := false
- // Skip spaces in format but absorb at most one newline.
- for isSpace(fmtc) && i < len(format) {
- if fmtc == '\n' {
- if wasNewline { // Already saw one; stop here.
- break
- }
- wasNewline = true
- }
- sawSpace = true
- i += w
- fmtc, w = utf8.DecodeRuneInString(format[i:])
- }
- if sawSpace {
- // There was space in the format, so there should be space
- // in the input.
- inputc := s.getRune()
- if inputc == eof {
- return
- }
- if !isSpace(inputc) {
- // Space in format but not in input.
- s.errorString("expected space in input to match format")
- }
- // Skip spaces but stop at newline.
- for inputc != '\n' && isSpace(inputc) {
- inputc = s.getRune()
- }
- if inputc == '\n' {
- if !wasNewline {
- s.errorString("newline in input does not match format")
- }
- // We've reached a newline, stop now; don't read further.
- return
- }
- s.UnreadRune()
- if wasNewline {
- s.errorString("newline in format does not match input")
- }
- continue
- }
+
+ // Literals.
inputc := s.mustReadRune()
if fmtc != inputc {
s.UnreadRune()
{"X%dX", " X27X ", &intVal, nil}, // input does not match format
{"X%dX\n", "X27X", &intVal, 27},
- {"X%dX\n", "X27X ", &intVal, nil}, // newline in format does not match input
+ {"X%dX \n", "X27X ", &intVal, 27},
{"X%dX\n", "X27X\n", &intVal, 27},
{"X%dX\n", "X27X \n", &intVal, 27},
{"X%dX \n", "X27X", &intVal, 27},
- {"X%dX \n", "X27X ", &intVal, nil}, // newline in format does not match input
+ {"X%dX \n", "X27X ", &intVal, 27},
{"X%dX \n", "X27X\n", &intVal, 27},
{"X%dX \n", "X27X \n", &intVal, 27},
{"X %c", "X!", &runeVal, nil}, // expected space in input to match format
{"X %c", "X\n", &runeVal, nil}, // newline in input does not match format
{"X %c", "X !", &runeVal, '!'},
- {"X %c", "X \n", &runeVal, nil}, // newline in input does not match format
+ {"X %c", "X \n", &runeVal, '\n'},
{" X%dX", "X27X", &intVal, nil}, // expected space in input to match format
{" X%dX", "X27X ", &intVal, nil}, // expected space in input to match format
{" X%dX ", " X27X ", &intVal, 27},
{"%d\nX", "27\nX", &intVal, 27},
- {"%dX\n X", "27X\n X", &intVal, nil}, // input does not match format
+ {"%dX\n X", "27X\n X", &intVal, 27},
}
var overflowTests = []ScanTest{
{"space vs newline no-percent 0001", "1\n2", "1\n 2", 0, true},
{"space vs newline no-percent 0010", "1\n2", "1 \n2", 0, true},
{"space vs newline no-percent 0011", "1\n2", "1 \n 2", 0, true},
- {"space vs newline no-percent 0100", "1\n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern
- {"space vs newline no-percent 0101", "1\n 2", "1\n2 ", 0, false}, // fails: space after nl in input but not pattern
- {"space vs newline no-percent 0110", "1\n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern
- {"space vs newline no-percent 0111", "1\n 2", "1 \n 2", 0, false}, // fails: hard to explain
+ {"space vs newline no-percent 0100", "1\n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern
+ {"space vs newline no-percent 0101", "1\n 2", "1\n2 ", 0, false}, // fails: space after nl in input but not pattern
+ {"space vs newline no-percent 0110", "1\n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern
+ {"space vs newline no-percent 0111", "1\n 2", "1 \n 2", 0, true},
{"space vs newline no-percent 1000", "1 \n2", "1\n2", 0, true},
{"space vs newline no-percent 1001", "1 \n2", "1\n 2", 0, true},
{"space vs newline no-percent 1010", "1 \n2", "1 \n2", 0, true},
{"space vs newline no-percent 1011", "1 \n2", "1 \n 2", 0, true},
- {"space vs newline no-percent 1100", "1 \n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern
- {"space vs newline no-percent 1101", "1 \n 2", "1\n 2", 0, false}, // fails: hard to explain
- {"space vs newline no-percent 1110", "1 \n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern
- {"space vs newline no-percent 1111", "1 \n 2", "1 \n 2", 0, false}, // fails: hard to explain
+ {"space vs newline no-percent 1100", "1 \n 2", "1\n2", 0, false}, // fails: space after nl in input but not pattern
+ {"space vs newline no-percent 1101", "1 \n 2", "1\n 2", 0, true},
+ {"space vs newline no-percent 1110", "1 \n 2", "1 \n2", 0, false}, // fails: space after nl in input but not pattern
+ {"space vs newline no-percent 1111", "1 \n 2", "1 \n 2", 0, true},
}
for _, test := range tests {
var n int