strings: add Title

author Rob Pike <r@golang.org>

Tue, 20 Jul 2010 07:03:59 +0000 (00:03 -0700)

committer Rob Pike <r@golang.org>

Tue, 20 Jul 2010 07:03:59 +0000 (00:03 -0700)
author Rob Pike <r@golang.org>
Tue, 20 Jul 2010 07:03:59 +0000 (00:03 -0700)
committer Rob Pike <r@golang.org>
Tue, 20 Jul 2010 07:03:59 +0000 (00:03 -0700)
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go

index 5d3d61e19516272c8a6c8b82efdab9c238dbaebe..594f03527e1e2ec968fd114b83a8c0459a3e2177 100644 (file)
--- a/src/pkg/strings/strings.go
+++ b/src/pkg/strings/strings.go
@@ -333,6 +333,52 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
         return Map(func(r int) int { return _case.ToTitle(r) }, s)
  }
  
+// isSeparator reports whether the rune could mark a word boundary.
+// TODO: update when package unicode captures more of the properties.
+func isSeparator(rune int) bool {
+       // ASCII alphanumerics and underscore are not separators
+       if rune <= 0x7F {
+               switch {
+               case '0' <= rune && rune <= '9':
+                       return false
+               case 'a' <= rune && rune <= 'z':
+                       return false
+               case 'A' <= rune && rune <= 'Z':
+                       return false
+               case rune == '_':
+                       return false
+               }
+               return true
+       }
+       // Letters and digits are not separators
+       if unicode.IsLetter(rune) || unicode.IsDigit(rune) {
+               return false
+       }
+       // Otherwise, all we can do for now is treat spaces as separators.
+       return unicode.IsSpace(rune)
+}
+
+// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
+
+// Title returns a copy of the string s with all Unicode letters that begin words
+// mapped to their title case.
+func Title(s string) string {
+       // Use a closure here to remember state.
+       // Hackish but effective. Depends on Map scanning in order and calling
+       // the closure once per rune.
+       prev := ' '
+       return Map(
+               func(r int) int {
+                       if isSeparator(prev) {
+                               prev = r
+                               return unicode.ToTitle(r)
+                       }
+                       prev = r
+                       return r
+               },
+               s)
+}
+
  // TrimLeftFunc returns a slice of the string s with all leading
  // Unicode code points c satisfying f(c) removed.
  func TrimLeftFunc(s string, f func(r int) bool) string {
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go

index 06f1f1de1de89bae0b6c770791f2efcea6a99488..8b0c98113a68f3893921c2eda20c7f000e33485a 100644 (file)
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@@ -741,3 +741,25 @@ func TestReplace(t *testing.T) {
                 }
         }
  }
+
+type TitleTest struct {
+       in, out string
+}
+
+var TitleTests = []TitleTest{
+       TitleTest{"", ""},
+       TitleTest{"a", "A"},
+       TitleTest{" aaa aaa aaa ", " Aaa Aaa Aaa "},
+       TitleTest{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
+       TitleTest{"123a456", "123a456"},
+       TitleTest{"double-blind", "Double-Blind"},
+       TitleTest{"ÿøû", "Ÿøû"},
+}
+
+func TestTitle(t *testing.T) {
+       for _, tt := range TitleTests {
+               if s := Title(tt.in); s != tt.out {
+                       t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
+               }
+       }
+}
author	Rob Pike <r@golang.org>
	Tue, 20 Jul 2010 07:03:59 +0000 (00:03 -0700)
committer	Rob Pike <r@golang.org>
	Tue, 20 Jul 2010 07:03:59 +0000 (00:03 -0700)
src/pkg/strings/strings.go		patch \| blob \| history
src/pkg/strings/strings_test.go		patch \| blob \| history