S.next()
}
-func (S *Scanner) scanRawString() {
+func (S *Scanner) scanRawString() (hasCR bool) {
// '`' opening already consumed
offs := S.offset - 1
for S.ch != '`' {
ch := S.ch
S.next()
+ if ch == '\r' {
+ hasCR = true
+ }
if ch < 0 {
S.error(offs, "string not terminated")
break
}
S.next()
+ return
}
func (S *Scanner) skipWhitespace() {
return tok0
}
+func stripCR(b []byte) []byte {
+ c := make([]byte, len(b))
+ i := 0
+ for _, ch := range b {
+ if ch != '\r' {
+ c[i] = ch
+ i++
+ }
+ }
+ return c[:i]
+}
+
// Scan scans the next token and returns the token position,
// the token, and the literal string corresponding to the
// token. The source end is indicated by token.EOF.
insertSemi := false
offs := S.offset
tok := token.ILLEGAL
+ hasCR := false
// determine token value
switch ch := S.ch; {
case '`':
insertSemi = true
tok = token.STRING
- S.scanRawString()
+ hasCR = S.scanRawString()
case ':':
tok = S.switch2(token.COLON, token.DEFINE)
case '.':
// TODO(gri): The scanner API should change such that the literal string
// is only valid if an actual literal was scanned. This will
// permit a more efficient implementation.
- return S.file.Pos(offs), tok, string(S.src[offs:S.offset])
+ lit := S.src[offs:S.offset]
+ if hasCR {
+ lit = stripCR(lit)
+ }
+ return S.file.Pos(offs), tok, string(lit)
}
"`",
literal,
},
+ {token.STRING, "`\r`", literal},
+ {token.STRING, "`foo\r\nbar`", literal},
// Operators and delimiters
{token.ADD, "+", operator},
if tok != e.tok {
t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
}
- if e.tok.IsLiteral() && lit != e.lit {
- t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
+ if e.tok.IsLiteral() {
+ // no CRs in raw string literals
+ elit := e.lit
+ if elit[0] == '`' {
+ elit = string(stripCR([]byte(elit)))
+ epos.Offset += len(e.lit) - len(lit) // correct position
+ }
+ if lit != elit {
+ t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
+ }
}
if tokenclass(tok) != e.class {
t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)