From: Rob Pike <r@golang.org>
Date: Tue, 20 Jul 2010 07:03:59 +0000 (-0700)
Subject: strings: add Title
X-Git-Tag: weekly.2010-07-29~71
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8684a089896bffb5ed0818bc39d7ab7c2cf0e2fe;p=gostls13.git

strings: add Title
strings.ToTitle converts all characters to title case, which for consistency with the
other To* functions it should continue to do.  This CL adds string.Title, which
does a proper title-casing of the string.
A similar function for package bytes will follow once this is settled.
Fixes #933.

R=rsc
CC=golang-dev
https://golang.org/cl/1869042
---

diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go
index 5d3d61e195..594f03527e 100644
--- a/src/pkg/strings/strings.go
+++ b/src/pkg/strings/strings.go
@@ -333,6 +333,52 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
 	return Map(func(r int) int { return _case.ToTitle(r) }, s)
 }
 
+// isSeparator reports whether the rune could mark a word boundary.
+// TODO: update when package unicode captures more of the properties.
+func isSeparator(rune int) bool {
+	// ASCII alphanumerics and underscore are not separators
+	if rune <= 0x7F {
+		switch {
+		case '0' <= rune && rune <= '9':
+			return false
+		case 'a' <= rune && rune <= 'z':
+			return false
+		case 'A' <= rune && rune <= 'Z':
+			return false
+		case rune == '_':
+			return false
+		}
+		return true
+	}
+	// Letters and digits are not separators
+	if unicode.IsLetter(rune) || unicode.IsDigit(rune) {
+		return false
+	}
+	// Otherwise, all we can do for now is treat spaces as separators.
+	return unicode.IsSpace(rune)
+}
+
+// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
+
+// Title returns a copy of the string s with all Unicode letters that begin words
+// mapped to their title case.
+func Title(s string) string {
+	// Use a closure here to remember state.
+	// Hackish but effective. Depends on Map scanning in order and calling
+	// the closure once per rune.
+	prev := ' '
+	return Map(
+		func(r int) int {
+			if isSeparator(prev) {
+				prev = r
+				return unicode.ToTitle(r)
+			}
+			prev = r
+			return r
+		},
+		s)
+}
+
 // TrimLeftFunc returns a slice of the string s with all leading
 // Unicode code points c satisfying f(c) removed.
 func TrimLeftFunc(s string, f func(r int) bool) string {
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go
index 06f1f1de1d..8b0c98113a 100644
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@@ -741,3 +741,25 @@ func TestReplace(t *testing.T) {
 		}
 	}
 }
+
+type TitleTest struct {
+	in, out string
+}
+
+var TitleTests = []TitleTest{
+	TitleTest{"", ""},
+	TitleTest{"a", "A"},
+	TitleTest{" aaa aaa aaa ", " Aaa Aaa Aaa "},
+	TitleTest{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
+	TitleTest{"123a456", "123a456"},
+	TitleTest{"double-blind", "Double-Blind"},
+	TitleTest{"Ã¿Ã¸Ã»", "Å¸Ã¸Ã»"},
+}
+
+func TestTitle(t *testing.T) {
+	for _, tt := range TitleTests {
+		if s := Title(tt.in); s != tt.out {
+			t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
+		}
+	}
+}