]> Cypherpunks repositories - gostls13.git/commitdiff
utf8: add Valid and ValidString
authorBrad Fitzpatrick <bradfitz@golang.org>
Fri, 7 Oct 2011 05:47:24 +0000 (22:47 -0700)
committerBrad Fitzpatrick <bradfitz@golang.org>
Fri, 7 Oct 2011 05:47:24 +0000 (22:47 -0700)
R=r, rsc, alex.brainman
CC=golang-dev
https://golang.org/cl/5234041

src/pkg/utf8/utf8.go
src/pkg/utf8/utf8_test.go

index 8910e17d7705b89e751695a3809a35f47751724d..3cd919d1d98e4a6c2b8bc1010e385a3a6b98faca 100644 (file)
@@ -354,3 +354,40 @@ func RuneCountInString(s string) (n int) {
 // an encoded rune.  Second and subsequent bytes always have the top
 // two bits set to 10.
 func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
+
+// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
+func Valid(p []byte) bool {
+       i := 0
+       for i < len(p) {
+               if p[i] < RuneSelf {
+                       i++
+               } else {
+                       _, size := DecodeRune(p[i:])
+                       if size == 1 {
+                               // All valid runes of size of 1 (those
+                               // below RuneSelf) were handled above.
+                               // This must be a RuneError.
+                               return false
+                       }
+                       i += size
+               }
+       }
+       return true
+}
+
+// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
+func ValidString(s string) bool {
+       for i, r := range s {
+               if r == RuneError {
+                       // The RuneError value can be an error
+                       // sentinel value (if it's size 1) or the same
+                       // value encoded properly. Decode it to see if
+                       // it's the 1 byte sentinel value.
+                       _, size := DecodeRuneInString(s[i:])
+                       if size == 1 {
+                               return false
+                       }
+               }
+       }
+       return true
+}
index 7a1db93e550d7f5510836c7c15ba511f7fee20c4..6cbbebc1a37f4b27c8109de7ac986cdc2faa98da 100644 (file)
@@ -274,6 +274,35 @@ func TestRuneCount(t *testing.T) {
        }
 }
 
+type ValidTest struct {
+       in  string
+       out bool
+}
+
+var validTests = []ValidTest{
+       {"", true},
+       {"a", true},
+       {"abc", true},
+       {"Ж", true},
+       {"ЖЖ", true},
+       {"брэд-ЛГТМ", true},
+       {"☺☻☹", true},
+       {string([]byte{66, 250}), false},
+       {string([]byte{66, 250, 67}), false},
+       {"a\uFFFDb", true},
+}
+
+func TestValid(t *testing.T) {
+       for i, tt := range validTests {
+               if Valid([]byte(tt.in)) != tt.out {
+                       t.Errorf("%d. Valid(%q) = %v; want %v", i, tt.in, !tt.out, tt.out)
+               }
+               if ValidString(tt.in) != tt.out {
+                       t.Errorf("%d. ValidString(%q) = %v; want %v", i, tt.in, !tt.out, tt.out)
+               }
+       }
+}
+
 func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
        for i := 0; i < b.N; i++ {
                RuneCountInString("0123456789")