bytes: add Title

author Rob Pike <r@golang.org>

Wed, 21 Jul 2010 02:53:59 +0000 (19:53 -0700)

committer Rob Pike <r@golang.org>

Wed, 21 Jul 2010 02:53:59 +0000 (19:53 -0700)
author Rob Pike <r@golang.org>
Wed, 21 Jul 2010 02:53:59 +0000 (19:53 -0700)
committer Rob Pike <r@golang.org>
Wed, 21 Jul 2010 02:53:59 +0000 (19:53 -0700)
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go

index bcf7b8609b34d7453082cb057b67358e0b97000c..25a5738324cacb48d2c30dbc894ef406f3b52fbf 100644 (file)
--- a/src/pkg/bytes/bytes.go
+++ b/src/pkg/bytes/bytes.go
@@ -332,6 +332,52 @@ func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
  // ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.
  func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
  
+// isSeparator reports whether the rune could mark a word boundary.
+// TODO: update when package unicode captures more of the properties.
+func isSeparator(rune int) bool {
+       // ASCII alphanumerics and underscore are not separators
+       if rune <= 0x7F {
+               switch {
+               case '0' <= rune && rune <= '9':
+                       return false
+               case 'a' <= rune && rune <= 'z':
+                       return false
+               case 'A' <= rune && rune <= 'Z':
+                       return false
+               case rune == '_':
+                       return false
+               }
+               return true
+       }
+       // Letters and digits are not separators
+       if unicode.IsLetter(rune) || unicode.IsDigit(rune) {
+               return false
+       }
+       // Otherwise, all we can do for now is treat spaces as separators.
+       return unicode.IsSpace(rune)
+}
+
+// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
+
+// Title returns a copy of s with all Unicode letters that begin words
+// mapped to their title case.
+func Title(s []byte) []byte {
+       // Use a closure here to remember state.
+       // Hackish but effective. Depends on Map scanning in order and calling
+       // the closure once per rune.
+       prev := ' '
+       return Map(
+               func(r int) int {
+                       if isSeparator(prev) {
+                               prev = r
+                               return unicode.ToTitle(r)
+                       }
+                       prev = r
+                       return r
+               },
+               s)
+}
+
  // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8 encoded
  // Unicode code points c that satisfy f(c).
  func TrimLeftFunc(s []byte, f func(r int) bool) []byte {
diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go

index 8197543dcfba5553c3edef7c2cfe27c9988ba712..de5edd120fc22bd039989431cd91882a1aaad06f 100644 (file)
--- a/src/pkg/bytes/bytes_test.go
+++ b/src/pkg/bytes/bytes_test.go
@@ -685,3 +685,25 @@ func TestReplace(t *testing.T) {
                 }
         }
  }
+
+type TitleTest struct {
+       in, out string
+}
+
+var TitleTests = []TitleTest{
+       TitleTest{"", ""},
+       TitleTest{"a", "A"},
+       TitleTest{" aaa aaa aaa ", " Aaa Aaa Aaa "},
+       TitleTest{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
+       TitleTest{"123a456", "123a456"},
+       TitleTest{"double-blind", "Double-Blind"},
+       TitleTest{"ÿøû", "Ÿøû"},
+}
+
+func TestTitle(t *testing.T) {
+       for _, tt := range TitleTests {
+               if s := string(Title([]byte(tt.in))); s != tt.out {
+                       t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
+               }
+       }
+}
author	Rob Pike <r@golang.org>
	Wed, 21 Jul 2010 02:53:59 +0000 (19:53 -0700)
committer	Rob Pike <r@golang.org>
	Wed, 21 Jul 2010 02:53:59 +0000 (19:53 -0700)
src/pkg/bytes/bytes.go		patch \| blob \| history
src/pkg/bytes/bytes_test.go		patch \| blob \| history