bufio.6: io.dirinstall os.dirinstall
flag.6: fmt.dirinstall
testing.6: flag.install fmt.dirinstall
+strings.6: utf8.install
fmt.dirinstall: io.dirinstall reflect.dirinstall strconv.dirinstall
hash.dirinstall: os.dirinstall
package strings
-// Count UTF-8 sequences in s.
-// Assumes s is well-formed.
-export func utflen(s string) int {
- n := 0;
- for i := 0; i < len(s); i++ {
- if s[i]&0xC0 != 0x80 {
- n++
- }
- }
- return n
-}
+import "utf8"
// Split string into array of UTF-8 sequences (still strings)
export func explode(s string) *[]string {
- a := new([]string, utflen(s));
+ a := new([]string, utf8.RuneCountInString(s, 0, len(s)));
j := 0;
+ var size, rune int;
for i := 0; i < len(a); i++ {
- ej := j;
- ej++;
- for ej < len(s) && (s[ej]&0xC0) == 0x80 {
- ej++
- }
- a[i] = s[j:ej];
- j = ej
+ rune, size = utf8.DecodeRuneInString(s, j);
+ a[i] = string(rune);
+ j += size;
}
return a
}
// Count non-overlapping instances of sep in s.
export func count(s, sep string) int {
if sep == "" {
- return utflen(s)+1
+ return utf8.RuneCountInString(s, 0, len(s))+1
}
c := sep[0];
n := 0;
a[na] = s[start:len(s)];
return a
}
-
+
// Join list of strings with separators between them.
export func join(a *[]string, sep string) string {
if len(a) == 0 {
}
}
-// TODO: utflen shouldn't even be in strings.
-type UtflenTest struct {
- in string;
- out int;
-}
-var utflentests = []UtflenTest {
- UtflenTest{ abcd, 4 },
- UtflenTest{ faces, 3 },
- UtflenTest{ commas, 7 },
-}
-export func TestUtflen(t *testing.T) {
- for i := 0; i < len(utflentests); i++ {
- tt := utflentests[i];
- if out := strings.utflen(tt.in); out != tt.out {
- t.Errorf("utflen(%q) = %d, want %d", tt.in, out, tt.out);
- }
- }
-}
return RuneError, 1, false
}
-func DecodeRuneInStringInternal(s string, i int) (rune, size int, short bool) {
- n := len(s) - i;
+func DecodeRuneInStringInternal(s string, i int, n int) (rune, size int, short bool) {
if n < 1 {
return RuneError, 0, true;
}
}
export func FullRuneInString(s string, i int) bool {
- rune, size, short := DecodeRuneInStringInternal(s, i);
+ rune, size, short := DecodeRuneInStringInternal(s, i, len(s) - i);
return !short
}
export func DecodeRuneInString(s string, i int) (rune, size int) {
var short bool;
- rune, size, short = DecodeRuneInStringInternal(s, i);
+ rune, size, short = DecodeRuneInStringInternal(s, i, len(s) - i);
return;
}
return 4;
}
+export func RuneCount(p *[]byte) int {
+ i := 0;
+ var n int;
+ for n = 0; i < len(p); n++ {
+ if p[i] < RuneSelf {
+ i++;
+ } else {
+ rune, size := DecodeRune(p[i:len(p)]);
+ i += size;
+ }
+ }
+ return n;
+}
+
+export func RuneCountInString(s string, i int, l int) int {
+ ei := i + l;
+ n := 0;
+ for n = 0; i < ei; n++ {
+ if s[i] < RuneSelf {
+ i++;
+ } else {
+ rune, size, short := DecodeRuneInStringInternal(s, i, ei - i);
+ i += size;
+ }
+ }
+ return n;
+}
+
}
}
}
+
+type RuneCountTest struct {
+ in string;
+ out int;
+}
+var runecounttests = []RuneCountTest {
+ RuneCountTest{ "abcd", 4 },
+ RuneCountTest{ "☺☻☹", 3 },
+ RuneCountTest{ "1,2,3,4", 7 },
+ RuneCountTest{ "\xe2\x00", 2 },
+}
+export func TestRuneCount(t *testing.T) {
+ for i := 0; i < len(runecounttests); i++ {
+ tt := runecounttests[i];
+ if out := utf8.RuneCountInString(tt.in, 0, len(tt.in)); out != tt.out {
+ t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out);
+ }
+ if out := utf8.RuneCount(Bytes(tt.in)); out != tt.out {
+ t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out);
+ }
+ }
+}