From: Rob Pike Date: Tue, 20 Jul 2010 07:03:59 +0000 (-0700) Subject: strings: add Title X-Git-Tag: weekly.2010-07-29~71 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8684a089896bffb5ed0818bc39d7ab7c2cf0e2fe;p=gostls13.git strings: add Title strings.ToTitle converts all characters to title case, which for consistency with the other To* functions it should continue to do. This CL adds string.Title, which does a proper title-casing of the string. A similar function for package bytes will follow once this is settled. Fixes #933. R=rsc CC=golang-dev https://golang.org/cl/1869042 --- diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 5d3d61e195..594f03527e 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -333,6 +333,52 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string { return Map(func(r int) int { return _case.ToTitle(r) }, s) } +// isSeparator reports whether the rune could mark a word boundary. +// TODO: update when package unicode captures more of the properties. +func isSeparator(rune int) bool { + // ASCII alphanumerics and underscore are not separators + if rune <= 0x7F { + switch { + case '0' <= rune && rune <= '9': + return false + case 'a' <= rune && rune <= 'z': + return false + case 'A' <= rune && rune <= 'Z': + return false + case rune == '_': + return false + } + return true + } + // Letters and digits are not separators + if unicode.IsLetter(rune) || unicode.IsDigit(rune) { + return false + } + // Otherwise, all we can do for now is treat spaces as separators. + return unicode.IsSpace(rune) +} + +// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. + +// Title returns a copy of the string s with all Unicode letters that begin words +// mapped to their title case. +func Title(s string) string { + // Use a closure here to remember state. + // Hackish but effective. Depends on Map scanning in order and calling + // the closure once per rune. + prev := ' ' + return Map( + func(r int) int { + if isSeparator(prev) { + prev = r + return unicode.ToTitle(r) + } + prev = r + return r + }, + s) +} + // TrimLeftFunc returns a slice of the string s with all leading // Unicode code points c satisfying f(c) removed. func TrimLeftFunc(s string, f func(r int) bool) string { diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index 06f1f1de1d..8b0c98113a 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -741,3 +741,25 @@ func TestReplace(t *testing.T) { } } } + +type TitleTest struct { + in, out string +} + +var TitleTests = []TitleTest{ + TitleTest{"", ""}, + TitleTest{"a", "A"}, + TitleTest{" aaa aaa aaa ", " Aaa Aaa Aaa "}, + TitleTest{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "}, + TitleTest{"123a456", "123a456"}, + TitleTest{"double-blind", "Double-Blind"}, + TitleTest{"ÿøû", "Ÿøû"}, +} + +func TestTitle(t *testing.T) { + for _, tt := range TitleTests { + if s := Title(tt.in); s != tt.out { + t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out) + } + } +}