]> Cypherpunks repositories - gostls13.git/commitdiff
gc: disallow NUL byte, catch more invalid UTF-8, test
authorRuss Cox <rsc@golang.org>
Wed, 17 Feb 2010 00:47:39 +0000 (16:47 -0800)
committerRuss Cox <rsc@golang.org>
Wed, 17 Feb 2010 00:47:39 +0000 (16:47 -0800)
R=ken2, ken3
CC=golang-dev
https://golang.org/cl/209041

src/cmd/gc/lex.c
src/cmd/gc/subr.c
test/nul.go [new file with mode: 0644]

index 60c08ebb75ab62fed1f06d2d2083fbf0e5745548..6862774253e664ceb75cb908856bedf4990839ff 100644 (file)
@@ -520,18 +520,19 @@ l0:
                ncp = 8;
 
                for(;;) {
-                       if(clen == ncp) {
-                               cp = remal(cp, clen, ncp);
+                       if(clen+UTFmax > ncp) {
+                               cp = remal(cp, ncp, ncp);
                                ncp += ncp;
                        }
-                       c = getc();
+                       c = getr();
                        if(c == EOF) {
                                yyerror("eof in string");
                                break;
                        }
                        if(c == '`')
                                break;
-                       cp[clen++] = c;
+                       rune = c;
+                       clen += runetochar(cp+clen, &rune);
                }
 
        strlit:
@@ -821,28 +822,16 @@ talph:
         */
        for(;;) {
                if(c >= Runeself) {
-                       for(c1=0;;) {
-                               cp[c1++] = c;
-                               if(fullrune(cp, c1)) {
-                                       chartorune(&rune, cp);
-                                       if(isfrog(rune)) {
-                                               yyerror("illegal character 0x%ux", rune);
-                                               goto l0;
-                                       }
-                                       // 0xb7 · is used for internal names
-                                       if(!isalpharune(rune) && !isdigitrune(rune) && rune != 0xb7)
-                                               yyerror("invalid identifier character 0x%ux", rune);
-                                       break;
-                               }
-                               c = getc();
-                       }
-                       cp += c1;
-                       c = getc();
-                       continue;
-               }
-               if(!isalnum(c) && c != '_')
+                       ungetc(c);
+                       rune = getr();
+                       // 0xb7 · is used for internal names
+                       if(!isalpharune(rune) && !isdigitrune(rune) && rune != 0xb7)
+                               yyerror("invalid identifier character 0x%ux", rune);
+                       cp += runetochar(cp, &rune);
+               } else if(!isalnum(c) && c != '_')
                        break;
-               *cp++ = c;
+               else
+                       *cp++ = c;
                c = getc();
        }
        *cp = 0;
@@ -1054,8 +1043,10 @@ getc(void)
 
        switch(c) {
        case 0:
-               if(curio.bin != nil)
+               if(curio.bin != nil) {
+                       yyerror("illegal NUL byte");
                        break;
+               }
        case EOF:
                return EOF;
 
@@ -1097,10 +1088,11 @@ loop:
        c = chartorune(&rune, str);
        if(rune == Runeerror && c == 1) {
                lineno = lexlineno;
-               yyerror("illegal UTF-8 sequence in comment or string");
+               yyerror("illegal UTF-8 sequence");
                flusherrors();
+               print("\t");
                for(c=0; c<i; c++)
-                       print(" %.2x", *(uchar*)(str+c));
+                       print("%s%.2x", c > 0 ? " " : "", *(uchar*)(str+c));
                print("\n");
        }
        return rune;
@@ -1209,11 +1201,11 @@ oct:
                        l = l*8 + c-'0';
                        continue;
                }
-               yyerror("non-oct character in escape sequence: %c", c);
+               yyerror("non-octal character in escape sequence: %c", c);
                ungetc(c);
        }
        if(l > 255)
-               yyerror("oct escape value > 255: %d", l);
+               yyerror("octal escape value > 255: %d", l);
 
        *val = l;
        return 0;
index ee47cc8e1ad4d8e94e7f7d2b4f14a2cb000db897..7072d95e42b656ab2a26cef61c66f891eb34691a 100644 (file)
@@ -1525,6 +1525,7 @@ Zconv(Fmt *fp)
        Rune r;
        Strlit *sp;
        char *s, *se;
+       int n;
 
        sp = va_arg(fp->args, Strlit*);
        if(sp == nil)
@@ -1533,8 +1534,15 @@ Zconv(Fmt *fp)
        s = sp->s;
        se = s + sp->len;
        while(s < se) {
-               s += chartorune(&r, s);
+               n = chartorune(&r, s);
+               s += n;
                switch(r) {
+               case Runeerror:
+                       if(n == 1) {
+                               fmtprint(fp, "\\x%02x", *(s-1));
+                               break;
+                       }
+                       // fall through
                default:
                        if(r < ' ') {
                                fmtprint(fp, "\\x%02x", r);
diff --git a/test/nul.go b/test/nul.go
new file mode 100644 (file)
index 0000000..026d397
--- /dev/null
@@ -0,0 +1,58 @@
+// $G $D/$F.go && $L $F.$A && ./$A.out >tmp.go &&
+// errchk $G -e tmp.go
+// rm -f tmp.go
+
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test source files and strings containing NUL and invalid UTF-8.
+
+package main
+
+import (
+       "fmt"
+       "os"
+)
+
+func main() {
+       var s = "\xc2\xff"
+       var t = "\xd0\xfe"
+       var u = "\xab\x00\xfc"
+
+       if len(s) != 2 || s[0] != 0xc2 || s[1] != 0xff ||
+               len(t) != 2 || t[0] != 0xd0 || t[1] != 0xfe ||
+               len(u) != 3 || u[0] != 0xab || u[1] != 0x00 || u[2] != 0xfc {
+               println("BUG: non-UTF-8 string mangled");
+               os.Exit(2)
+       }
+
+       fmt.Print(`
+package main
+
+var x = "in string ` + "\x00" + `"     // ERROR "NUL"
+
+var y = ` + "`in raw string \x00 foo`" + `  // ERROR "NUL"
+
+// in comment ` + "\x00" + `  // ERROR "NUL"
+
+/* in other comment ` + "\x00" + ` */ // ERROR "NUL"
+
+/* in source code */ ` + "\x00" + `// ERROR "NUL"
+
+var xx = "in string ` + "\xc2\xff" + `" // ERROR "UTF-8"
+
+var yy = ` + "`in raw string \xff foo`" + `  // ERROR "UTF-8"
+
+// in comment ` + "\xe2\x80\x01" + `  // ERROR "UTF-8"
+
+/* in other comment ` + "\xe0\x00\x00" + ` */ // ERROR "UTF-8"
+
+/* in variable name */
+var z` + "\xc1\x81" + ` int // ERROR "UTF-8"
+
+/* in source code */ ` + "\xc2A" + `// ERROR "UTF-8"
+
+`)
+}
+