publish strconv.UnquoteChar

author Russ Cox <rsc@golang.org>

Tue, 23 Jun 2009 23:44:01 +0000 (16:44 -0700)

committer Russ Cox <rsc@golang.org>

Tue, 23 Jun 2009 23:44:01 +0000 (16:44 -0700)
author Russ Cox <rsc@golang.org>
Tue, 23 Jun 2009 23:44:01 +0000 (16:44 -0700)
committer Russ Cox <rsc@golang.org>
Tue, 23 Jun 2009 23:44:01 +0000 (16:44 -0700)
diff --git a/src/pkg/strconv/quote.go b/src/pkg/strconv/quote.go

index 8d7900d1d76f666170478c5158b7f6ccb8310133..f970ef518920e3355a11587291dbe56e4b684744 100644 (file)
--- a/src/pkg/strconv/quote.go
+++ b/src/pkg/strconv/quote.go
@@ -97,22 +97,35 @@ func unhex(b byte) (v int, ok bool) {
         return;
  }
  
-func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
-       err = os.EINVAL;  // assume error for easy return
-
+// UnquoteChar decodes the first character or byte in the escaped string
+// or character literal represented by the string s.
+// It returns four values: 
+// 1) value, the decoded Unicode code point or byte value;
+// 2) multibyte, a boolean indicating whether the decoded character
+//    requires a multibyte UTF-8 representation;
+// 3) tail, the remainder of the string after the character; and
+// 4) an error that will be nil if the character is syntactically valid.
+// The second argument, quote, specifies the type of literal being parsed
+// and therefore which escaped quote character is permitted.
+// If set to a single quote, it permits the sequence \' and disallows unescaped '.
+// If set to a double quote, it permits \" and disallows unescaped ".
+// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
+func UnquoteChar(s string, quote byte) (value int, multibyte bool, tail string, err os.Error) {
         // easy cases
         switch c := s[0]; {
+       case c == quote && (quote == '\'' || quote == '"'):
+               err = os.EINVAL;
+               return;
         case c >= utf8.RuneSelf:
                 r, size := utf8.DecodeRuneInString(s);
-               return s[0:size], s[size:len(s)], nil;
-       case c == q:
-               return;
+               return r, true, s[size:len(s)], nil;
         case c != '\\':
-               return s[0:1], s[1:len(s)], nil;
+               return int(s[0]), false, s[1:len(s)], nil;
         }
  
         // hard case: c is backslash
         if len(s) <= 1 {
+               err = os.EINVAL;
                 return;
         }
         c := s[1];
@@ -120,19 +133,19 @@ func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
  
         switch c {
         case 'a':
-               return "\a", s, nil;
+               value = '\a';
         case 'b':
-               return "\b", s, nil;
+               value = '\b';
         case 'f':
-               return "\f", s, nil;
+               value = '\f';
         case 'n':
-               return "\n", s, nil;
+               value = '\n';
         case 'r':
-               return "\r", s, nil;
+               value = '\r';
         case 't':
-               return "\t", s, nil;
+               value = '\t';
         case 'v':
-               return "\v", s, nil;
+               value = '\v';
         case 'x', 'u', 'U':
                 n := 0;
                 switch c {
@@ -145,11 +158,13 @@ func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
                 }
                 v := 0;
                 if len(s) < n {
+                       err = os.EINVAL;
                         return;
                 }
                 for j := 0; j < n; j++ {
                         x, ok := unhex(s[j]);
                         if !ok {
+                               err = os.EINVAL;
                                 return;
                         }
                         v = v<<4 | x;
@@ -157,15 +172,19 @@ func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
                 s = s[n:len(s)];
                 if c == 'x' {
                         // single-byte string, possibly not UTF-8
-                       return string([]byte{byte(v)}), s, nil;
+                       value = v;
+                       break;
                 }
                 if v > utf8.RuneMax {
+                       err = os.EINVAL;
                         return;
                 }
-               return string(v), s, nil;
+               value = v;
+               multibyte = true;
         case '0', '1', '2', '3', '4', '5', '6', '7':
                 v := int(c) - '0';
                 if len(s) < 2 {
+                       err = os.EINVAL;
                         return;
                 }
                 for j := 0; j < 2; j++ {        // one digit already; two more
@@ -177,13 +196,23 @@ func unquoteChar(s string, q byte) (t, ns string, err os.Error) {
                 }
                 s = s[2:len(s)];
                 if v > 255 {
+                       err = os.EINVAL;
                         return;
                 }
-               return string(v), s, nil;
-
-       case '\\', q:
-               return string(c), s, nil;
+               value = v;
+       case '\\':
+               value = '\\';
+       case '\'', '"':
+               if c != quote {
+                       err = os.EINVAL;
+                       return;
+               }
+               value = int(c);
+       default:
+               err = os.EINVAL;
+               return;
         }
+       tail = s;
         return;
  }
  
@@ -212,14 +241,19 @@ func Unquote(s string) (t string, err os.Error) {
         }
  
         // TODO(rsc): String accumulation could be more efficient.
-       var c, tt string;
-       var err1 os.Error;
+       var tt string;
         for len(s) > 0 {
-               if c, s, err1 = unquoteChar(s, quote); err1 != nil {
+               c, multibyte, ss, err1 := UnquoteChar(s, quote);
+               if err1 != nil {
                         err = err1;
                         return;
                 }
-               tt += c;
+               s = ss;
+               if multibyte || c < utf8.RuneSelf {
+                       tt += string(c);
+               } else {
+                       tt += string([]byte{byte(c)});
+               }
                 if quote == '\'' && len(s) != 0 {
                         // single-quoted must be single character
                         return;
diff --git a/src/pkg/strconv/quote_test.go b/src/pkg/strconv/quote_test.go

index 0fc01ebae3bd63e56228aa3cf80df2a47ad9338d..0b35b20cc7a64501e23a60204712632cc3929923 100644 (file)
--- a/src/pkg/strconv/quote_test.go
+++ b/src/pkg/strconv/quote_test.go
@@ -149,7 +149,7 @@ func TestUnquote(t *testing.T) {
         for i := 0; i < len(unquotetests); i++ {
                 tt := unquotetests[i];
                 if out, err := Unquote(tt.in); err != nil && out != tt.out {
-                       t.Errorf("Unquote(%s) = %q, %s want %q, nil", tt.in, out, err, tt.out);
+                       t.Errorf("Unquote(%#q) = %q, %v want %q, nil", tt.in, out, err, tt.out);
                 }
         }
  
@@ -157,14 +157,14 @@ func TestUnquote(t *testing.T) {
         for i := 0; i < len(quotetests); i++ {
                 tt := quotetests[i];
                 if in, err := Unquote(tt.out); in != tt.in {
-                       t.Errorf("Unquote(%s) = %q, %s, want %q, nil", tt.out, in, err, tt.in);
+                       t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in);
                 }
         }
  
         for i := 0; i < len(misquoted); i++ {
                 s := misquoted[i];
                 if out, err := Unquote(s); out != "" || err != os.EINVAL {
-                       t.Errorf("Unquote(%q) = %q, %s want %q, %s", s, out, err, "", os.EINVAL);
+                       t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", os.EINVAL);
                 }
         }
  }
author	Russ Cox <rsc@golang.org>
	Tue, 23 Jun 2009 23:44:01 +0000 (16:44 -0700)
committer	Russ Cox <rsc@golang.org>
	Tue, 23 Jun 2009 23:44:01 +0000 (16:44 -0700)
src/pkg/strconv/quote.go		patch \| blob \| history
src/pkg/strconv/quote_test.go		patch \| blob \| history