From: Robert Griesemer Date: Tue, 14 Aug 2012 18:26:30 +0000 (-0700) Subject: go/scanner: faster scanning X-Git-Tag: go1.1rc2~2651 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3df0545a8b0f3ae1b7638474c986142fa9462c93;p=gostls13.git go/scanner: faster scanning Optimize some common cases. benchmark old ns/op new ns/op delta BenchmarkScanFile 718907 667960 -7.09% benchmark old MB/s new MB/s speedup BenchmarkScanFile 23.03 25.51 1.11x R=r CC=golang-dev https://golang.org/cl/6454150 --- diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index bb1dba0eb0..f1e18111a1 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -572,14 +572,25 @@ scanAgain: // determine token value insertSemi := false switch ch := s.ch; { - case isLetter(ch): + case 'a' <= ch && ch <= 'z': + // literals start with a lower-case letter lit = s.scanIdentifier() - tok = token.Lookup(lit) - switch tok { - case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN: + if len(lit) > 1 { + // keywords are longer than one letter - avoid lookup otherwise + tok = token.Lookup(lit) + switch tok { + case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN: + insertSemi = true + } + } else { insertSemi = true + tok = token.IDENT } - case digitVal(ch) < 10: + case 'A' <= ch && ch <= 'Z' || ch == '_': + insertSemi = true + tok = token.IDENT + lit = s.scanIdentifier() + case '0' <= ch && ch <= '9': insertSemi = true tok, lit = s.scanNumber(false) default: @@ -612,7 +623,7 @@ scanAgain: case ':': tok = s.switch2(token.COLON, token.DEFINE) case '.': - if digitVal(s.ch) < 10 { + if '0' <= s.ch && s.ch <= '9' { insertSemi = true tok, lit = s.scanNumber(true) } else if s.ch == '.' { @@ -704,10 +715,17 @@ scanAgain: case '|': tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR) default: - s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch)) - insertSemi = s.insertSemi // preserve insertSemi info - tok = token.ILLEGAL - lit = string(ch) + if isLetter(ch) { + // handle any letters we might have missed + insertSemi = true + tok = token.IDENT + s.scanIdentifier() + } else { + s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch)) + insertSemi = s.insertSemi // preserve insertSemi info + tok = token.ILLEGAL + lit = string(ch) + } } } if s.mode&dontInsertSemis == 0 { diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index a2eb0865ee..a9a16a85b7 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -6,6 +6,7 @@ package scanner import ( "go/token" + "io/ioutil" "os" "path/filepath" "runtime" @@ -705,7 +706,7 @@ func BenchmarkScan(b *testing.B) { file := fset.AddFile("", fset.Base(), len(source)) var s Scanner b.StartTimer() - for i := b.N - 1; i >= 0; i-- { + for i := 0; i < b.N; i++ { s.Init(file, source, nil, ScanComments) for { _, tok, _ := s.Scan() @@ -715,3 +716,26 @@ func BenchmarkScan(b *testing.B) { } } } + +func BenchmarkScanFile(b *testing.B) { + b.StopTimer() + const filename = "scanner.go" + src, err := ioutil.ReadFile(filename) + if err != nil { + panic(err) + } + fset := token.NewFileSet() + file := fset.AddFile(filename, fset.Base(), len(src)) + b.SetBytes(int64(len(src))) + var s Scanner + b.StartTimer() + for i := 0; i < b.N; i++ { + s.Init(file, src, nil, ScanComments) + for { + _, tok, _ := s.Scan() + if tok == token.EOF { + break + } + } + } +}