go/scanner: strip CRs from raw literals

author Robert Griesemer <gri@golang.org>

Thu, 15 Dec 2011 18:51:32 +0000 (10:51 -0800)

committer Robert Griesemer <gri@golang.org>

Thu, 15 Dec 2011 18:51:32 +0000 (10:51 -0800)
author Robert Griesemer <gri@golang.org>
Thu, 15 Dec 2011 18:51:32 +0000 (10:51 -0800)
committer Robert Griesemer <gri@golang.org>
Thu, 15 Dec 2011 18:51:32 +0000 (10:51 -0800)
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go

index cef9c4865083fd333f1a9a888b240695a841b803..7fb0104e450a51d74df116cd347a137e0ccdbbb1 100644 (file)
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -426,13 +426,16 @@ func (S *Scanner) scanString() {
         S.next()
  }
  
-func (S *Scanner) scanRawString() {
+func (S *Scanner) scanRawString() (hasCR bool) {
         // '`' opening already consumed
         offs := S.offset - 1
  
         for S.ch != '`' {
                 ch := S.ch
                 S.next()
+               if ch == '\r' {
+                       hasCR = true
+               }
                 if ch < 0 {
                         S.error(offs, "string not terminated")
                         break
@@ -440,6 +443,7 @@ func (S *Scanner) scanRawString() {
         }
  
         S.next()
+       return
  }
  
  func (S *Scanner) skipWhitespace() {
@@ -490,6 +494,18 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
         return tok0
  }
  
+func stripCR(b []byte) []byte {
+       c := make([]byte, len(b))
+       i := 0
+       for _, ch := range b {
+               if ch != '\r' {
+                       c[i] = ch
+                       i++
+               }
+       }
+       return c[:i]
+}
+
  // Scan scans the next token and returns the token position,
  // the token, and the literal string corresponding to the
  // token. The source end is indicated by token.EOF.
@@ -518,6 +534,7 @@ scanAgain:
         insertSemi := false
         offs := S.offset
         tok := token.ILLEGAL
+       hasCR := false
  
         // determine token value
         switch ch := S.ch; {
@@ -556,7 +573,7 @@ scanAgain:
                 case '`':
                         insertSemi = true
                         tok = token.STRING
-                       S.scanRawString()
+                       hasCR = S.scanRawString()
                 case ':':
                         tok = S.switch2(token.COLON, token.DEFINE)
                 case '.':
@@ -663,5 +680,9 @@ scanAgain:
         // TODO(gri): The scanner API should change such that the literal string
         //            is only valid if an actual literal was scanned. This will
         //            permit a more efficient implementation.
-       return S.file.Pos(offs), tok, string(S.src[offs:S.offset])
+       lit := S.src[offs:S.offset]
+       if hasCR {
+               lit = stripCR(lit)
+       }
+       return S.file.Pos(offs), tok, string(lit)
  }
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go

index 7ed927a49fa976e605ee600d8877397186f4881b..dc8ab2a748a08a7826ccef1dd103ea1110a13779 100644 (file)
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -83,6 +83,8 @@ var tokens = [...]elt{
                 "`",
                 literal,
         },
+       {token.STRING, "`\r`", literal},
+       {token.STRING, "`foo\r\nbar`", literal},
  
         // Operators and delimiters
         {token.ADD, "+", operator},
@@ -239,8 +241,16 @@ func TestScan(t *testing.T) {
                 if tok != e.tok {
                         t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
                 }
-               if e.tok.IsLiteral() && lit != e.lit {
-                       t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
+               if e.tok.IsLiteral() {
+                       // no CRs in raw string literals
+                       elit := e.lit
+                       if elit[0] == '`' {
+                               elit = string(stripCR([]byte(elit)))
+                               epos.Offset += len(e.lit) - len(lit) // correct position
+                       }
+                       if lit != elit {
+                               t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
+                       }
                 }
                 if tokenclass(tok) != e.class {
                         t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
author	Robert Griesemer <gri@golang.org>
	Thu, 15 Dec 2011 18:51:32 +0000 (10:51 -0800)
committer	Robert Griesemer <gri@golang.org>
	Thu, 15 Dec 2011 18:51:32 +0000 (10:51 -0800)
src/pkg/go/scanner/scanner.go		patch \| blob \| history
src/pkg/go/scanner/scanner_test.go		patch \| blob \| history