]> Cypherpunks repositories - gostls13.git/commitdiff
add strconv.Unquote
authorRuss Cox <rsc@golang.org>
Mon, 13 Apr 2009 20:27:39 +0000 (13:27 -0700)
committerRuss Cox <rsc@golang.org>
Mon, 13 Apr 2009 20:27:39 +0000 (13:27 -0700)
R=r
DELTA=229  (227 added, 0 deleted, 2 changed)
OCL=27200
CL=27366

src/lib/strconv/quote.go
src/lib/strconv/quote_test.go

index 442821a53da40364a327d25d8e5f1f5211924cef..c06204013b9083984e504399cfff8a4e01b345ad 100644 (file)
@@ -5,6 +5,7 @@
 package strconv
 
 import (
+       "os";
        "utf8";
 )
 
@@ -15,6 +16,7 @@ const lowerhex = "0123456789abcdef"
 // sequences (\t, \n, \xFF, \u0100) for control characters
 // and non-ASCII characters.
 func Quote(s string) string {
+       // TODO(rsc): String accumulation could be more efficient.
        t := `"`;
        for i := 0; i < len(s); i++ {
                switch {
@@ -75,10 +77,153 @@ func Quote(s string) string {
 // a valid Go string literal if enclosed in backquotes.
 func CanBackquote(s string) bool {
        for i := 0; i < len(s); i++ {
-               if s[i] < ' ' || s[i] == '`' {
+               if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
                        return false;
                }
        }
        return true;
 }
 
+func unhex(b byte) (v int, ok bool) {
+       c := int(b);
+       switch {
+       case '0' <= c && c <= '9':
+               return c - '0', true;
+       case 'a' <= c && c <= 'f':
+               return c - 'a' + 10, true;
+       case 'A' <= c && c <= 'F':
+               return c - 'A' + 10, true;
+       }
+       return;
+}
+
+func unquoteChar(s string, i int, q byte) (t string, ii int, err *os.Error) {
+       err = os.EINVAL;  // assume error for easy return
+
+       // easy cases
+       switch c := s[i]; {
+       case c >= utf8.RuneSelf:
+               r, size := utf8.DecodeRuneInString(s, i);
+               return s[i:i+size], i+size, nil;
+       case c == q:
+               return;
+       case c != '\\':
+               return s[i:i+1], i+1, nil;
+       }
+
+       // hard case: c is backslash
+       if i+1 >= len(s) {
+               return;
+       }
+       c := s[i+1];
+       i += 2;
+
+       switch c {
+       case 'a':
+               return "\a", i, nil;
+       case 'b':
+               return "\b", i, nil;
+       case 'f':
+               return "\f", i, nil;
+       case 'n':
+               return "\n", i, nil;
+       case 'r':
+               return "\r", i, nil;
+       case 't':
+               return "\t", i, nil;
+       case 'v':
+               return "\v", i, nil;
+       case 'x', 'u', 'U':
+               n := 0;
+               switch c {
+               case 'x':
+                       n = 2;
+               case 'u':
+                       n = 4;
+               case 'U':
+                       n = 8;
+               }
+               v := 0;
+               for j := 0; j < n; j++ {
+                       if i+j >= len(s) {
+                               return;
+                       }
+                       x, ok := unhex(s[i+j]);
+                       if !ok {
+                               return;
+                       }
+                       v = v<<4 | x;
+               }
+               if c == 'x' {
+                       return string([]byte{byte(v)}), i+n, nil;
+               }
+               if v > utf8.RuneMax {
+                       return;
+               }
+               return string(v), i+n, nil;
+       case '0', '1', '2', '3', '4', '5', '6', '7':
+               v := 0;
+               i--;
+               for j := 0; j < 3; j++ {
+                       if i+j >= len(s) {
+                               return;
+                       }
+                       x := int(s[i+j]) - '0';
+                       if x < 0 || x > 7 {
+                               return;
+                       }
+                       v = (v<<3) | x;
+               }
+               if v > 255 {
+                       return;
+               }
+               return string(v), i+3, nil;
+                       
+       case '\\', q:
+               return string(c), i, nil;
+       }
+       return;
+}
+
+// Unquote interprets s as a single-quoted, double-quoted,
+// or backquoted Go string literal, returning the string value
+// that s quotes.  (If s is single-quoted, it would be a Go
+// character literal; Unquote returns the corresponding
+// one-character string.)
+func Unquote(s string) (t string, err *os.Error) {
+       err = os.EINVAL;  // assume error for easy return
+       n := len(s);
+       if n < 2 || s[0] != s[n-1] {
+               return;
+       }
+
+       switch s[0] {
+       case '`':
+               t := s[1:n-1];
+               return t, nil;
+
+       case '"', '\'':
+               // TODO(rsc): String accumulation could be more efficient.
+               t := "";
+               q := s[0];
+               var c string;
+               var err *os.Error;
+               for i := 1; i < n-1; {
+                       c, i, err = unquoteChar(s, i, q);
+                       if err != nil {
+                               return "", err;
+                       }
+                       t += c;
+                       if q == '\'' && i != n-1 {
+                               // single-quoted must be single character
+                               return;
+                       }
+                       if i > n-1 {
+                               // read too far
+                               return;
+                       }
+               }
+               return t, nil
+       }
+       return;
+}
index 8421fcde4973006e76d859beb6167b5fdb173bc0..0fc01ebae3bd63e56228aa3cf80df2a47ad9338d 100644 (file)
@@ -5,6 +5,7 @@
 package strconv
 
 import (
+       "os";
        "strconv";
        "testing";
 )
@@ -48,7 +49,7 @@ var canbackquotetests = []canBackquoteTest {
        canBackquoteTest{ string(6), false },
        canBackquoteTest{ string(7), false },
        canBackquoteTest{ string(8), false },
-       canBackquoteTest{ string(9), false },
+       canBackquoteTest{ string(9), true },    // \t
        canBackquoteTest{ string(10), false },
        canBackquoteTest{ string(11), false },
        canBackquoteTest{ string(12), false },
@@ -86,3 +87,84 @@ func TestCanBackquote(t *testing.T) {
                }
        }
 }
+
+var unquotetests = []quoteTest {
+       quoteTest{ `""`, "" },
+       quoteTest{ `"a"`, "a" },
+       quoteTest{ `"abc"`, "abc" },
+       quoteTest{ `"☺"`, "☺" },
+       quoteTest{ `"hello world"`, "hello world" },
+       quoteTest{ `"\xFF"`, "\xFF" },
+       quoteTest{ `"\377"`, "\377" },
+       quoteTest{ `"\u1234"`, "\u1234" },
+       quoteTest{ `"\U00010111"`, "\U00010111" },
+       quoteTest{ `"\U0001011111"`, "\U0001011111" },
+       quoteTest{ `"\a\b\f\n\r\t\v\\\""`, "\a\b\f\n\r\t\v\\\"" },
+       quoteTest{ `"'"`, "'" },
+
+       quoteTest{ `'a'`, "a" },
+       quoteTest{ `'☹'`, "☹" },
+       quoteTest{ `'\a'`, "\a" },
+       quoteTest{ `'\x10'`, "\x10" },
+       quoteTest{ `'\377'`, "\377" },
+       quoteTest{ `'\u1234'`, "\u1234" },
+       quoteTest{ `'\U00010111'`, "\U00010111" },
+       quoteTest{ `'\t'`, "\t" },
+       quoteTest{ `' '`, " " },
+       quoteTest{ `'\''`, "'" },
+       quoteTest{ `'"'`, "\"" },
+
+       quoteTest{ "``", `` },
+       quoteTest{ "`a`", `a` },
+       quoteTest{ "`abc`", `abc` },
+       quoteTest{ "`☺`", `☺` },
+       quoteTest{ "`hello world`", `hello world` },
+       quoteTest{ "`\\xFF`", `\xFF` },
+       quoteTest{ "`\\377`", `\377` },
+       quoteTest{ "`\\`", `\` },
+       quoteTest{ "`   `", `   ` },
+       quoteTest{ "` `", ` ` },
+}
+
+var misquoted = []string {
+       ``,
+       `"`,
+       `"a`,
+       `"'`,
+       `b"`,
+       `"\"`,
+       `'\'`,
+       `'ab'`,
+       `"\x1!"`,
+       `"\U12345678"`,
+       `"\z"`,
+       "`",
+       "`xxx",
+       "`\"",
+       `"\'"`,
+       `'\"'`,
+}
+
+func TestUnquote(t *testing.T) {
+       for i := 0; i < len(unquotetests); i++ {
+               tt := unquotetests[i];
+               if out, err := Unquote(tt.in); err != nil && out != tt.out {
+                       t.Errorf("Unquote(%s) = %q, %s want %q, nil", tt.in, out, err, tt.out);
+               }
+       }
+
+       // run the quote tests too, backward
+       for i := 0; i < len(quotetests); i++ {
+               tt := quotetests[i];
+               if in, err := Unquote(tt.out); in != tt.in {
+                       t.Errorf("Unquote(%s) = %q, %s, want %q, nil", tt.out, in, err, tt.in);
+               }
+       }
+
+       for i := 0; i < len(misquoted); i++ {
+               s := misquoted[i];
+               if out, err := Unquote(s); out != "" || err != os.EINVAL {
+                       t.Errorf("Unquote(%q) = %q, %s want %q, %s", s, out, err, "", os.EINVAL);
+               }
+       }
+}