bytes: explicitly state if a function expects UTF-8-encoded data

author Tim Cooper <tim.cooper@layeh.com>

Thu, 28 Sep 2017 00:44:38 +0000 (21:44 -0300)

committer Rob Pike <r@golang.org>

Mon, 2 Oct 2017 00:31:47 +0000 (00:31 +0000)
author Tim Cooper <tim.cooper@layeh.com>
Thu, 28 Sep 2017 00:44:38 +0000 (21:44 -0300)
committer Rob Pike <r@golang.org>
Mon, 2 Oct 2017 00:31:47 +0000 (00:31 +0000)
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go

index fe7c09fa9bb8f7ab13879243c32921f70a988314..286cd4454dcf72ea83ca1992d51f6aa999c7415a 100644 (file)
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -68,12 +68,12 @@ func Contains(b, subslice []byte) bool {
         return Index(b, subslice) != -1
  }
  
-// ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b.
+// ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
  func ContainsAny(b []byte, chars string) bool {
         return IndexAny(b, chars) >= 0
  }
  
-// ContainsRune reports whether the Unicode code point r is within b.
+// ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
  func ContainsRune(b []byte, r rune) bool {
         return IndexRune(b, r) >= 0
  }
@@ -112,7 +112,7 @@ func LastIndexByte(s []byte, c byte) int {
         return -1
  }
  
-// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
+// IndexRune interprets s as a sequence of UTF-8-encoded code points.
  // It returns the byte index of the first occurrence in s of the given rune.
  // It returns -1 if rune is not present in s.
  // If r is utf8.RuneError, it returns the first instance of any
@@ -263,7 +263,8 @@ func SplitAfter(s, sep []byte) [][]byte {
  
  var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
  
-// Fields splits the slice s around each instance of one or more consecutive white space
+// Fields interprets s as a sequence of UTF-8-encoded code points.
+// It splits the slice s around each instance of one or more consecutive white space
  // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
  // empty slice if s contains only white space.
  func Fields(s []byte) [][]byte {
@@ -316,7 +317,7 @@ func Fields(s []byte) [][]byte {
         return a
  }
  
-// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
+// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
  // It splits the slice s at each run of code points c satisfying f(c) and
  // returns a slice of subslices of s. If all code points in s satisfy f(c), or
  // len(s) == 0, an empty slice is returned.
@@ -405,7 +406,7 @@ func HasSuffix(s, suffix []byte) bool {
  // Map returns a copy of the byte slice s with all its characters modified
  // according to the mapping function. If mapping returns a negative value, the character is
  // dropped from the string with no replacement. The characters in s and the
-// output are interpreted as UTF-8-encoded Unicode code points.
+// output are interpreted as UTF-8-encoded code points.
  func Map(mapping func(r rune) rune, s []byte) []byte {
         // In the worst case, the slice can grow when mapped, making
         // things unpleasant. But it's so rare we barge in assuming it's
@@ -463,28 +464,28 @@ func Repeat(b []byte, count int) []byte {
         return nb
  }
  
-// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
+// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
  func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
  
-// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
+// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
  func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
  
-// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
+// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
  func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
  
-// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
+// ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
  // upper case, giving priority to the special casing rules.
  func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
         return Map(func(r rune) rune { return c.ToUpper(r) }, s)
  }
  
-// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
+// ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
  // lower case, giving priority to the special casing rules.
  func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
         return Map(func(r rune) rune { return c.ToLower(r) }, s)
  }
  
-// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
+// ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
  // title case, giving priority to the special casing rules.
  func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
         return Map(func(r rune) rune { return c.ToTitle(r) }, s)
@@ -515,8 +516,8 @@ func isSeparator(r rune) bool {
         return unicode.IsSpace(r)
  }
  
-// Title returns a copy of s with all Unicode letters that begin words
-// mapped to their title case.
+// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
+// words mapped to their title case.
  //
  // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
  func Title(s []byte) []byte {
@@ -536,8 +537,8 @@ func Title(s []byte) []byte {
                 s)
  }
  
-// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded
-// Unicode code points c that satisfy f(c).
+// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
+// all leading UTF-8-encoded code points c that satisfy f(c).
  func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
         i := indexFunc(s, f, false)
         if i == -1 {
@@ -546,8 +547,8 @@ func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
         return s[i:]
  }
  
-// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8
-// encoded Unicode code points c that satisfy f(c).
+// TrimRightFunc returns a subslice of s by slicing off all trailing
+// UTF-8-encoded code points c that satisfy f(c).
  func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
         i := lastIndexFunc(s, f, false)
         if i >= 0 && s[i] >= utf8.RuneSelf {
@@ -560,7 +561,7 @@ func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
  }
  
  // TrimFunc returns a subslice of s by slicing off all leading and trailing
-// UTF-8-encoded Unicode code points c that satisfy f(c).
+// UTF-8-encoded code points c that satisfy f(c).
  func TrimFunc(s []byte, f func(r rune) bool) []byte {
         return TrimRightFunc(TrimLeftFunc(s, f), f)
  }
@@ -583,14 +584,14 @@ func TrimSuffix(s, suffix []byte) []byte {
         return s
  }
  
-// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
+// IndexFunc interprets s as a sequence of UTF-8-encoded code points.
  // It returns the byte index in s of the first Unicode
  // code point satisfying f(c), or -1 if none do.
  func IndexFunc(s []byte, f func(r rune) bool) int {
         return indexFunc(s, f, true)
  }
  
-// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
+// LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
  // It returns the byte index in s of the last Unicode
  // code point satisfying f(c), or -1 if none do.
  func LastIndexFunc(s []byte, f func(r rune) bool) int {
@@ -681,19 +682,19 @@ func makeCutsetFunc(cutset string) func(r rune) bool {
  }
  
  // Trim returns a subslice of s by slicing off all leading and
-// trailing UTF-8-encoded Unicode code points contained in cutset.
+// trailing UTF-8-encoded code points contained in cutset.
  func Trim(s []byte, cutset string) []byte {
         return TrimFunc(s, makeCutsetFunc(cutset))
  }
  
  // TrimLeft returns a subslice of s by slicing off all leading
-// UTF-8-encoded Unicode code points contained in cutset.
+// UTF-8-encoded code points contained in cutset.
  func TrimLeft(s []byte, cutset string) []byte {
         return TrimLeftFunc(s, makeCutsetFunc(cutset))
  }
  
  // TrimRight returns a subslice of s by slicing off all trailing
-// UTF-8-encoded Unicode code points that are contained in cutset.
+// UTF-8-encoded code points that are contained in cutset.
  func TrimRight(s []byte, cutset string) []byte {
         return TrimRightFunc(s, makeCutsetFunc(cutset))
  }
@@ -704,7 +705,8 @@ func TrimSpace(s []byte) []byte {
         return TrimFunc(s, unicode.IsSpace)
  }
  
-// Runes returns a slice of runes (Unicode code points) equivalent to s.
+// Runes interprets s as a sequence of UTF-8-encoded code points.
+// It returns a slice of runes (Unicode code points) equivalent to s.
  func Runes(s []byte) []rune {
         t := make([]rune, utf8.RuneCount(s))
         i := 0
diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go

index 77d5970152a43b5ab03e1642eb393fbc3832d9dd..88b0564db4c8cd1498e820d54b3bb2999078773f 100644 (file)
--- a/src/bytes/bytes_amd64.go
+++ b/src/bytes/bytes_amd64.go
@@ -97,7 +97,7 @@ func Index(s, sep []byte) int {
  }
  
  // Count counts the number of non-overlapping instances of sep in s.
-// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
+// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
  func Count(s, sep []byte) int {
         if len(sep) == 1 && cpu.X86.HasPOPCNT {
                 return countByte(s, sep[0])
diff --git a/src/bytes/bytes_generic.go b/src/bytes/bytes_generic.go

index 98454bc121b8b3971ff70cfd420be7ab5c82bfbc..32abd3b33ff7d19173e61a60063c0fef0bbaa7ba 100644 (file)
--- a/src/bytes/bytes_generic.go
+++ b/src/bytes/bytes_generic.go
@@ -41,7 +41,7 @@ func Index(s, sep []byte) int {
  }
  
  // Count counts the number of non-overlapping instances of sep in s.
-// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
+// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
  func Count(s, sep []byte) int {
         return countGeneric(s, sep)
  }
diff --git a/src/bytes/bytes_s390x.go b/src/bytes/bytes_s390x.go

index 68b57301fe8e7b7b0bf248077a2f715c81f833a5..e25ca4b84e8f31fb4ddb8121aaf6a8a304624f9f 100644 (file)
--- a/src/bytes/bytes_s390x.go
+++ b/src/bytes/bytes_s390x.go
@@ -98,7 +98,7 @@ func Index(s, sep []byte) int {
  }
  
  // Count counts the number of non-overlapping instances of sep in s.
-// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
+// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
  func Count(s, sep []byte) int {
         return countGeneric(s, sep)
  }
author	Tim Cooper <tim.cooper@layeh.com>
	Thu, 28 Sep 2017 00:44:38 +0000 (21:44 -0300)
committer	Rob Pike <r@golang.org>
	Mon, 2 Oct 2017 00:31:47 +0000 (00:31 +0000)
src/bytes/bytes.go		patch \| blob \| history
src/bytes/bytes_amd64.go		patch \| blob \| history
src/bytes/bytes_generic.go		patch \| blob \| history
src/bytes/bytes_s390x.go		patch \| blob \| history