From: Russ Cox Date: Fri, 12 Sep 2008 23:12:20 +0000 (-0700) Subject: rudimentary string utilities. X-Git-Tag: weekly.2009-11-06~3220 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=729bc5c0c78d91435dd44bfcf40a8a2c631e14d2;p=gostls13.git rudimentary string utilities. R=r DELTA=314 (306 added, 8 deleted, 0 changed) OCL=15074 CL=15263 --- diff --git a/src/lib/make.bash b/src/lib/make.bash index b657a1f01b..cd5c0cd8d3 100755 --- a/src/lib/make.bash +++ b/src/lib/make.bash @@ -6,14 +6,6 @@ echo; echo; echo %%%% making lib %%%%; echo -rm -f *.6 -for i in fmt.go flag.go container/vector.go sort.go -do - base=$(basename $i .go) - echo 6g -o $GOROOT/pkg/$base.6 $i - 6g -o $GOROOT/pkg/$base.6 $i -done - for i in os math do echo; echo; echo %%%% making lib/$i %%%%; echo @@ -22,3 +14,11 @@ do cd .. done +rm -f *.6 +for i in fmt.go flag.go container/vector.go rand.go sort.go strings.go +do + base=$(basename $i .go) + echo 6g -o $GOROOT/pkg/$base.6 $i + 6g -o $GOROOT/pkg/$base.6 $i +done + diff --git a/src/lib/strings.go b/src/lib/strings.go new file mode 100644 index 0000000000..54aac30c96 --- /dev/null +++ b/src/lib/strings.go @@ -0,0 +1,198 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +// Count UTF-8 sequences in s. +// Assumes s is well-formed. +export func utflen(s string) int { + n := 0; + for i := 0; i < len(s); i++ { + if s[i]&0xC0 != 0x80 { + n++ + } + } + return n +} + +// Split string into array of UTF-8 sequences (still strings) +export func explode(s string) *[]string { + a := new([]string, utflen(s)); + j := 0; + for i := 0; i < len(a); i++ { + ej := j; + ej++; + for ej < len(s) && (s[ej]&0xC0) == 0x80 { + ej++ + } + a[i] = s[j:ej]; + j = ej + } + return a +} + +// Count non-overlapping instances of sep in s. +export func count(s, sep string) int { + if sep == "" { + return utflen(s)+1 + } + c := sep[0]; + n := 0; + for i := 0; i+len(sep) <= len(s); i++ { + if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { + n++; + i += len(sep)-1 + } + } + return n +} + +// Return index of first instance of sep in s. +export func index(s, sep string) int { + if sep == "" { + return 0 + } + c := sep[0]; + for i := 0; i+len(sep) <= len(s); i++ { + if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { + return i + } + } + return -1 +} + +// Split string into list of strings at separators +export func split(s, sep string) *[]string { + if sep == "" { + return explode(s) + } + c := sep[0]; + start := 0; + n := count(s, sep)+1; + a := new([]string, n); + na := 0; + for i := 0; i+len(sep) <= len(s); i++ { + if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { + a[na] = s[start:i]; + na++; + start = i+len(sep); + i += len(sep)-1 + } + } + a[na] = s[start:len(s)]; + return a +} + +// Join list of strings with separators between them. +export func join(a *[]string, sep string) string { + if len(a) == 0 { + return 0 + } + if len(a) == 1 { + return a[0] + } + n := len(sep) * (len(a)-1); + for i := 0; i < len(a); i++ { + n += len(a[i]) + } + + b := new([]byte, n); + bp := 0; + for i := 0; i < len(a); i++ { + s := a[i]; + for j := 0; j < len(s); j++ { + b[bp] = s[j]; + bp++ + } + if i + 1 < len(a) { + s = sep; + for j := 0; j < len(s); j++ { + b[bp] = s[j]; + bp++ + } + } + } + return string(b) +} + +// Convert decimal string to integer. +// TODO: Doesn't check for overflow. +export func atoi(s string) (i int, ok bool) { + // empty string bad + if len(s) == 0 { + return 0, false + } + + // pick off leading sign + neg := false; + if s[0] == '+' { + s = s[1:len(s)] + } else if s[0] == '-' { + neg = true; + s = s[1:len(s)] + } + + // empty string bad + if len(s) == 0 { + return 0, false + } + + // pick off zero + if s == "0" { + return 0, true + } + + // otherwise, leading zero bad + if s[0] == '0' { + return 0, false + } + + // parse number + n := 0; + for i := 0; i < len(s); i++ { + if s[i] < '0' || s[i] > '9' { + return 0, false + } + n = n*10 + int(s[i] - '0') + } + if neg { + n = -n + } + return n, true +} + +export func itoa(i int) string { + if i == 0 { + return "0" + } + + neg := false; // negative + bigneg := false; // largest negative number + if i < 0 { + neg = true; + i = -i; + if i < 0 { + bigneg = true; // is largest negative int + i-- // now positive + } + } + + // Assemble decimal in reverse order. + var b [32]byte; + bp := len(b); + for ; i > 0; i /= 10 { + bp--; + b[bp] = byte(i%10) + '0' + } + if neg { // add sign + bp--; + b[bp] = '-' + } + if bigneg { // account for i-- above + b[len(b)-1]++ + } + + // BUG return string(b[bp:len(b)]) + return string((&b)[bp:len(b)]) +} diff --git a/test/stringslib.go b/test/stringslib.go new file mode 100644 index 0000000000..a626370983 --- /dev/null +++ b/test/stringslib.go @@ -0,0 +1,108 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// $G $F.go && $L $F.$A && ./$A.out + +package main + +import strings "strings" + +func split(s, sep string) *[]string { + a := strings.split(s, sep); + b := strings.join(a, sep); + if b != s { + print("Split: ", s, " ", sep, " got ", len(a), "\n"); + for i := 0; i < len(a); i++ { + print(" a[", i, "] = ", a[i], "\n") + } + panic("split / join "+s+" "+sep) + } + return a +} + +func explode(s string) *[]string { + a := strings.explode(s); + b := strings.join(a, ""); + if b != s { + panic("explode / join "+s) + } + return a +} + +func itoa(i int) string { + s := strings.itoa(i); + n, ok := strings.atoi(s); + if n != i { + print("itoa: ", i, " ", s, "\n"); + panic("itoa") + } + return s +} + +func main() { + abcd := "abcd"; + faces := "☺☻☹"; + commas := "1,2,3,4"; + dots := "1....2....3....4"; + if strings.utflen(abcd) != 4 { panic("utflen abcd") } + if strings.utflen(faces) != 3 { panic("utflen faces") } + if strings.utflen(commas) != 7 { panic("utflen commas") } + { + a := split(abcd, "a"); + if len(a) != 2 || a[0] != "" || a[1] != "bcd" { panic("split abcd a") } + } + { + a := split(abcd, "z"); + if len(a) != 1 || a[0] != "abcd" { panic("split abcd z") } + } + { + a := split(abcd, ""); + if len(a) != 4 || a[0] != "a" || a[1] != "b" || a[2] != "c" || a[3] != "d" { panic("split abcd empty") } + } + { + a := explode(abcd); + if len(a) != 4 || a[0] != "a" || a[1] != "b" || a[2] != "c" || a[3] != "d" { panic("explode abcd") } + } + { + a := split(commas, ","); + if len(a) != 4 || a[0] != "1" || a[1] != "2" || a[2] != "3" || a[3] != "4" { panic("split commas") } + } + { + a := split(dots, "..."); + if len(a) != 4 || a[0] != "1" || a[1] != ".2" || a[2] != ".3" || a[3] != ".4" { panic("split dots") } + } + + { + a := split(faces, "☹"); + if len(a) != 2 || a[0] != "☺☻" || a[1] != "" { panic("split faces 1") } + } + { + a := split(faces, "~"); + if len(a) != 1 || a[0] != faces { panic("split faces ~") } + } + { + a := explode(faces); + if len(a) != 3 || a[0] != "☺" || a[1] != "☻" || a[2] != "☹" { panic("explode faces") } + } + { + a := split(faces, ""); + if len(a) != 3 || a[0] != "☺" || a[1] != "☻" || a[2] != "☹" { panic("split faces empty") } + } + + { + n, ok := strings.atoi("0"); if n != 0 || !ok { panic("atoi 0") } + n, ok = strings.atoi("-1"); if n != -1 || !ok { panic("atoi -1") } + n, ok = strings.atoi("+345"); if n != 345 || !ok { panic("atoi +345") } + n, ok = strings.atoi("9999"); if n != 9999 || !ok { panic("atoi 9999") } + n, ok = strings.atoi("20ba"); if n != 0 || ok { panic("atoi 20ba") } + n, ok = strings.atoi("hello"); if n != 0 || ok { panic("hello") } + } + + if itoa(0) != "0" { panic("itoa 0") } + if itoa(12345) != "12345" { panic("itoa 12345") } + if itoa(-1<<31) != "-2147483648" { panic("itoa 1<<31") } + + // should work if int == int64: is there some way to know? + // if itoa(-1<<63) != "-9223372036854775808" { panic("itoa 1<<63") } +}