]> Cypherpunks repositories - gostls13.git/commitdiff
go/scanner: faster scanning
authorRobert Griesemer <gri@golang.org>
Tue, 14 Aug 2012 18:26:30 +0000 (11:26 -0700)
committerRobert Griesemer <gri@golang.org>
Tue, 14 Aug 2012 18:26:30 +0000 (11:26 -0700)
Optimize some common cases.

benchmark            old ns/op    new ns/op    delta
BenchmarkScanFile       718907       667960   -7.09%

benchmark             old MB/s     new MB/s  speedup
BenchmarkScanFile        23.03        25.51    1.11x

R=r
CC=golang-dev
https://golang.org/cl/6454150

src/pkg/go/scanner/scanner.go
src/pkg/go/scanner/scanner_test.go

index bb1dba0eb08f01a4df240a9d40e5b13e6d6f481b..f1e18111a1458e56940d40081a8849c400754114 100644 (file)
@@ -572,14 +572,25 @@ scanAgain:
        // determine token value
        insertSemi := false
        switch ch := s.ch; {
-       case isLetter(ch):
+       case 'a' <= ch && ch <= 'z':
+               // literals start with a lower-case letter
                lit = s.scanIdentifier()
-               tok = token.Lookup(lit)
-               switch tok {
-               case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
+               if len(lit) > 1 {
+                       // keywords are longer than one letter - avoid lookup otherwise
+                       tok = token.Lookup(lit)
+                       switch tok {
+                       case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
+                               insertSemi = true
+                       }
+               } else {
                        insertSemi = true
+                       tok = token.IDENT
                }
-       case digitVal(ch) < 10:
+       case 'A' <= ch && ch <= 'Z' || ch == '_':
+               insertSemi = true
+               tok = token.IDENT
+               lit = s.scanIdentifier()
+       case '0' <= ch && ch <= '9':
                insertSemi = true
                tok, lit = s.scanNumber(false)
        default:
@@ -612,7 +623,7 @@ scanAgain:
                case ':':
                        tok = s.switch2(token.COLON, token.DEFINE)
                case '.':
-                       if digitVal(s.ch) < 10 {
+                       if '0' <= s.ch && s.ch <= '9' {
                                insertSemi = true
                                tok, lit = s.scanNumber(true)
                        } else if s.ch == '.' {
@@ -704,10 +715,17 @@ scanAgain:
                case '|':
                        tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
                default:
-                       s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
-                       insertSemi = s.insertSemi // preserve insertSemi info
-                       tok = token.ILLEGAL
-                       lit = string(ch)
+                       if isLetter(ch) {
+                               // handle any letters we might have missed
+                               insertSemi = true
+                               tok = token.IDENT
+                               s.scanIdentifier()
+                       } else {
+                               s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
+                               insertSemi = s.insertSemi // preserve insertSemi info
+                               tok = token.ILLEGAL
+                               lit = string(ch)
+                       }
                }
        }
        if s.mode&dontInsertSemis == 0 {
index a2eb0865ee35bf3c71b2876897a81291e6aac944..a9a16a85b79e450515ee2ce9ee3db00e92d7a07a 100644 (file)
@@ -6,6 +6,7 @@ package scanner
 
 import (
        "go/token"
+       "io/ioutil"
        "os"
        "path/filepath"
        "runtime"
@@ -705,7 +706,7 @@ func BenchmarkScan(b *testing.B) {
        file := fset.AddFile("", fset.Base(), len(source))
        var s Scanner
        b.StartTimer()
-       for i := b.N - 1; i >= 0; i-- {
+       for i := 0; i < b.N; i++ {
                s.Init(file, source, nil, ScanComments)
                for {
                        _, tok, _ := s.Scan()
@@ -715,3 +716,26 @@ func BenchmarkScan(b *testing.B) {
                }
        }
 }
+
+func BenchmarkScanFile(b *testing.B) {
+       b.StopTimer()
+       const filename = "scanner.go"
+       src, err := ioutil.ReadFile(filename)
+       if err != nil {
+               panic(err)
+       }
+       fset := token.NewFileSet()
+       file := fset.AddFile(filename, fset.Base(), len(src))
+       b.SetBytes(int64(len(src)))
+       var s Scanner
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               s.Init(file, src, nil, ScanComments)
+               for {
+                       _, tok, _ := s.Scan()
+                       if tok == token.EOF {
+                               break
+                       }
+               }
+       }
+}