// from an io.Reader. It is used if the Reader given to the scanner does
// not already implement ReadRuner.
type readRune struct {
- reader io.Reader
- buf [utf8.UTFMax]byte
+ reader io.Reader
+ buf [utf8.UTFMax]byte // used only inside ReadRune
+ pending int // number of bytes in pendBuf; only >0 for bad UTF-8
+ pendBuf [utf8.UTFMax]byte // bytes left over
+}
+
+// readByte returns the next byte from the input, which may be
+// left over from a previous read if the UTF-8 was ill-formed.
+func (r *readRune) readByte() (b byte, err os.Error) {
+ if r.pending > 0 {
+ b = r.pendBuf[0]
+ copy(r.pendBuf[0:], r.pendBuf[1:])
+ r.pending--
+ return
+ }
+ _, err = r.reader.Read(r.pendBuf[0:1])
+ return r.pendBuf[0], err
+}
+
+// unread saves the bytes for the next read.
+func (r *readRune) unread(buf []byte) {
+ copy(r.pendBuf[r.pending:], buf)
+ r.pending += len(buf)
}
// ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r.
-func (r readRune) ReadRune() (rune int, size int, err os.Error) {
- _, err = r.reader.Read(r.buf[0:1])
+func (r *readRune) ReadRune() (rune int, size int, err os.Error) {
+ r.buf[0], err = r.readByte()
if err != nil {
return 0, 0, err
}
rune = int(r.buf[0])
return
}
- for size := 1; size < utf8.UTFMax; size++ {
- _, err = r.reader.Read(r.buf[size : size+1])
+ var n int
+ for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
+ r.buf[n], err = r.readByte()
if err != nil {
- break
- }
- if !utf8.FullRune(r.buf[0:]) {
- continue
- }
- if c, w := utf8.DecodeRune(r.buf[0:size]); w == size {
- rune = c
+ if err == os.EOF {
+ err = nil
+ break
+ }
return
}
}
- return utf8.RuneError, 1, err
+ rune, size = utf8.DecodeRune(r.buf[0:n])
+ if size < n { // an error
+ r.unread(r.buf[size:n])
+ }
+ return
}
if rr, ok := r.(readRuner); ok {
s.rr = rr
} else {
- s.rr = readRune{reader: r}
+ s.rr = &readRune{reader: r}
}
s.nlIsSpace = nlIsSpace
s.peekRune = -1
"reflect"
"strings"
"testing"
+ "utf8"
)
type ScanTest struct {
var xVal Xs
+// myStringReader implements Read but not ReadRune, allowing us to test our readRune wrapper
+// type that creates something that can read runes given only Read().
+type myStringReader struct {
+ r *strings.Reader
+}
+
+func (s *myStringReader) Read(p []byte) (n int, err os.Error) {
+ return s.r.Read(p)
+}
+
+func newReader(s string) *myStringReader {
+ return &myStringReader{strings.NewReader(s)}
+}
+
var scanTests = []ScanTest{
// Numbers
ScanTest{"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written
ScanfTest{"%v", "-71\n", &intVal, -71},
ScanfTest{"%d", "72\n", &intVal, 72},
ScanfTest{"%c", "a\n", &intVal, 'a'},
+ ScanfTest{"%c", "\u5072\n", &intVal, 0x5072},
ScanfTest{"%c", "\u1234\n", &intVal, '\u1234'},
ScanfTest{"%d", "73\n", &int8Val, int8(73)},
ScanfTest{"%d", "+74\n", &int16Val, int16(74)},
ScanfTest{"%v\n", "true\n", &renamedBoolVal, renamedBool(true)},
ScanfTest{"%t\n", "F\n", &renamedBoolVal, renamedBool(false)},
ScanfTest{"%v", "101\n", &renamedIntVal, renamedInt(101)},
+ ScanfTest{"%c", "\u0101\n", &renamedIntVal, renamedInt('\u0101')},
ScanfTest{"%o", "0146\n", &renamedIntVal, renamedInt(102)},
ScanfTest{"%v", "103\n", &renamedUintVal, renamedUint(103)},
ScanfTest{"%d", "104\n", &renamedUintVal, renamedUint(104)},
ScanfMultiTest{"%3d22%3d", "33322333", args(&i, &j), args(333, 333), ""},
ScanfMultiTest{"%6vX=%3fY", "3+2iX=2.5Y", args(&c, &f), args((3 + 2i), float(2.5)), ""},
ScanfMultiTest{"%d%s", "123abc", args(&i, &s), args(123, "abc"), ""},
+ ScanfMultiTest{"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""},
// Custom scanner.
ScanfMultiTest{"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""},
ScanfMultiTest{"%d %d %d", "23 18", args(&i, &j), args(23, 18), "too few operands"},
ScanfMultiTest{"%d %d", "23 18 27", args(&i, &j, &k), args(23, 18), "too many operands"},
ScanfMultiTest{"%c", "\u0100", args(&int8Val), nil, "overflow"},
+
+ // Bad UTF-8: should see every byte.
+ ScanfMultiTest{"%c%c%c", "\xc2X\xc2", args(&i, &j, &k), args(utf8.RuneError, 'X', utf8.RuneError), ""},
}
-func testScan(t *testing.T, scan func(r io.Reader, a ...interface{}) (int, os.Error)) {
+func testScan(name string, t *testing.T, scan func(r io.Reader, a ...interface{}) (int, os.Error)) {
for _, test := range scanTests {
- r := strings.NewReader(test.text)
+ var r io.Reader
+ if name == "StringReader" {
+ r = strings.NewReader(test.text)
+ } else {
+ r = newReader(test.text)
+ }
n, err := scan(r, test.in)
if err != nil {
- t.Errorf("got error scanning %q: %s", test.text, err)
+ t.Errorf("%s got error scanning %q: %s", name, test.text, err)
continue
}
if n != 1 {
- t.Errorf("count error on entry %q: got %d", test.text, n)
+ t.Errorf("%s count error on entry %q: got %d", name, test.text, n)
continue
}
// The incoming value may be a pointer
}
val := v.Interface()
if !reflect.DeepEqual(val, test.out) {
- t.Errorf("scanning %q: expected %v got %v, type %T", test.text, test.out, val, val)
+ t.Errorf("%s scanning %q: expected %v got %v, type %T", name, test.text, test.out, val, val)
}
}
}
func TestScan(t *testing.T) {
- testScan(t, Fscan)
+ testScan("StringReader", t, Fscan)
+}
+
+func TestMyReaderScan(t *testing.T) {
+ testScan("myStringReader", t, Fscan)
}
func TestScanln(t *testing.T) {
- testScan(t, Fscanln)
+ testScan("StringReader", t, Fscanln)
+}
+
+func TestMyReaderScanln(t *testing.T) {
+ testScan("myStringReader", t, Fscanln)
}
func TestScanf(t *testing.T) {
// TODO: there's no conversion from []T to ...T, but we can fake it. These
// functions do the faking. We index the table by the length of the param list.
-var scanf = []func(string, string, []interface{}) (int, os.Error){
- 0: func(s, f string, i []interface{}) (int, os.Error) { return Sscanf(s, f) },
- 1: func(s, f string, i []interface{}) (int, os.Error) { return Sscanf(s, f, i[0]) },
- 2: func(s, f string, i []interface{}) (int, os.Error) { return Sscanf(s, f, i[0], i[1]) },
- 3: func(s, f string, i []interface{}) (int, os.Error) { return Sscanf(s, f, i[0], i[1], i[2]) },
+var fscanf = []func(io.Reader, string, []interface{}) (int, os.Error){
+ 0: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f) },
+ 1: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f, i[0]) },
+ 2: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f, i[0], i[1]) },
+ 3: func(r io.Reader, f string, i []interface{}) (int, os.Error) { return Fscanf(r, f, i[0], i[1], i[2]) },
}
-func TestScanfMulti(t *testing.T) {
+func testScanfMulti(name string, t *testing.T) {
sliceType := reflect.Typeof(make([]interface{}, 1)).(*reflect.SliceType)
for _, test := range multiTests {
- n, err := scanf[len(test.in)](test.text, test.format, test.in)
+ var r io.Reader
+ if name == "StringReader" {
+ r = strings.NewReader(test.text)
+ } else {
+ r = newReader(test.text)
+ }
+ n, err := fscanf[len(test.in)](r, test.format, test.in)
if err != nil {
if test.err == "" {
t.Errorf("got error scanning (%q, %q): %q", test.format, test.text, err)
}
}
+func TestScanfMulti(t *testing.T) {
+ testScanfMulti("StringReader", t)
+}
+
+func TestMyReaderScanfMulti(t *testing.T) {
+ testScanfMulti("myStringReader", t)
+}
+
func TestScanMultiple(t *testing.T) {
var a int
var s string