From f2af0c178099c0d785efe3b6f5f2282277f9ee9c Mon Sep 17 00:00:00 2001 From: Tim Cooper Date: Wed, 27 Sep 2017 21:44:38 -0300 Subject: [PATCH] bytes: explicitly state if a function expects UTF-8-encoded data Fixes #21950 Change-Id: I6fa392abd2c3bf6a4f80f14c6b1419470e9a944d Reviewed-on: https://go-review.googlesource.com/66750 Run-TryBot: Rob Pike TryBot-Result: Gobot Gobot Reviewed-by: Rob Pike --- src/bytes/bytes.go | 52 ++++++++++++++++++++------------------ src/bytes/bytes_amd64.go | 2 +- src/bytes/bytes_generic.go | 2 +- src/bytes/bytes_s390x.go | 2 +- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index fe7c09fa9b..286cd4454d 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -68,12 +68,12 @@ func Contains(b, subslice []byte) bool { return Index(b, subslice) != -1 } -// ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b. +// ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. func ContainsAny(b []byte, chars string) bool { return IndexAny(b, chars) >= 0 } -// ContainsRune reports whether the Unicode code point r is within b. +// ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. func ContainsRune(b []byte, r rune) bool { return IndexRune(b, r) >= 0 } @@ -112,7 +112,7 @@ func LastIndexByte(s []byte, c byte) int { return -1 } -// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. +// IndexRune interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index of the first occurrence in s of the given rune. // It returns -1 if rune is not present in s. // If r is utf8.RuneError, it returns the first instance of any @@ -263,7 +263,8 @@ func SplitAfter(s, sep []byte) [][]byte { var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} -// Fields splits the slice s around each instance of one or more consecutive white space +// Fields interprets s as a sequence of UTF-8-encoded code points. +// It splits the slice s around each instance of one or more consecutive white space // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an // empty slice if s contains only white space. func Fields(s []byte) [][]byte { @@ -316,7 +317,7 @@ func Fields(s []byte) [][]byte { return a } -// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// FieldsFunc interprets s as a sequence of UTF-8-encoded code points. // It splits the slice s at each run of code points c satisfying f(c) and // returns a slice of subslices of s. If all code points in s satisfy f(c), or // len(s) == 0, an empty slice is returned. @@ -405,7 +406,7 @@ func HasSuffix(s, suffix []byte) bool { // Map returns a copy of the byte slice s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. The characters in s and the -// output are interpreted as UTF-8-encoded Unicode code points. +// output are interpreted as UTF-8-encoded code points. func Map(mapping func(r rune) rune, s []byte) []byte { // In the worst case, the slice can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's @@ -463,28 +464,28 @@ func Repeat(b []byte, count int) []byte { return nb } -// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. +// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } -// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. +// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } -// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. +// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } -// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToUpper(r) }, s) } -// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToLower(r) }, s) } -// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their +// ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { return Map(func(r rune) rune { return c.ToTitle(r) }, s) @@ -515,8 +516,8 @@ func isSeparator(r rune) bool { return unicode.IsSpace(r) } -// Title returns a copy of s with all Unicode letters that begin words -// mapped to their title case. +// Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin +// words mapped to their title case. // // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. func Title(s []byte) []byte { @@ -536,8 +537,8 @@ func Title(s []byte) []byte { s) } -// TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded -// Unicode code points c that satisfy f(c). +// TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off +// all leading UTF-8-encoded code points c that satisfy f(c). func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { i := indexFunc(s, f, false) if i == -1 { @@ -546,8 +547,8 @@ func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { return s[i:] } -// TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 -// encoded Unicode code points c that satisfy f(c). +// TrimRightFunc returns a subslice of s by slicing off all trailing +// UTF-8-encoded code points c that satisfy f(c). func TrimRightFunc(s []byte, f func(r rune) bool) []byte { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { @@ -560,7 +561,7 @@ func TrimRightFunc(s []byte, f func(r rune) bool) []byte { } // TrimFunc returns a subslice of s by slicing off all leading and trailing -// UTF-8-encoded Unicode code points c that satisfy f(c). +// UTF-8-encoded code points c that satisfy f(c). func TrimFunc(s []byte, f func(r rune) bool) []byte { return TrimRightFunc(TrimLeftFunc(s, f), f) } @@ -583,14 +584,14 @@ func TrimSuffix(s, suffix []byte) []byte { return s } -// IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// IndexFunc interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index in s of the first Unicode // code point satisfying f(c), or -1 if none do. func IndexFunc(s []byte, f func(r rune) bool) int { return indexFunc(s, f, true) } -// LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. +// LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. // It returns the byte index in s of the last Unicode // code point satisfying f(c), or -1 if none do. func LastIndexFunc(s []byte, f func(r rune) bool) int { @@ -681,19 +682,19 @@ func makeCutsetFunc(cutset string) func(r rune) bool { } // Trim returns a subslice of s by slicing off all leading and -// trailing UTF-8-encoded Unicode code points contained in cutset. +// trailing UTF-8-encoded code points contained in cutset. func Trim(s []byte, cutset string) []byte { return TrimFunc(s, makeCutsetFunc(cutset)) } // TrimLeft returns a subslice of s by slicing off all leading -// UTF-8-encoded Unicode code points contained in cutset. +// UTF-8-encoded code points contained in cutset. func TrimLeft(s []byte, cutset string) []byte { return TrimLeftFunc(s, makeCutsetFunc(cutset)) } // TrimRight returns a subslice of s by slicing off all trailing -// UTF-8-encoded Unicode code points that are contained in cutset. +// UTF-8-encoded code points that are contained in cutset. func TrimRight(s []byte, cutset string) []byte { return TrimRightFunc(s, makeCutsetFunc(cutset)) } @@ -704,7 +705,8 @@ func TrimSpace(s []byte) []byte { return TrimFunc(s, unicode.IsSpace) } -// Runes returns a slice of runes (Unicode code points) equivalent to s. +// Runes interprets s as a sequence of UTF-8-encoded code points. +// It returns a slice of runes (Unicode code points) equivalent to s. func Runes(s []byte) []rune { t := make([]rune, utf8.RuneCount(s)) i := 0 diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go index 77d5970152..88b0564db4 100644 --- a/src/bytes/bytes_amd64.go +++ b/src/bytes/bytes_amd64.go @@ -97,7 +97,7 @@ func Index(s, sep []byte) int { } // Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { if len(sep) == 1 && cpu.X86.HasPOPCNT { return countByte(s, sep[0]) diff --git a/src/bytes/bytes_generic.go b/src/bytes/bytes_generic.go index 98454bc121..32abd3b33f 100644 --- a/src/bytes/bytes_generic.go +++ b/src/bytes/bytes_generic.go @@ -41,7 +41,7 @@ func Index(s, sep []byte) int { } // Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { return countGeneric(s, sep) } diff --git a/src/bytes/bytes_s390x.go b/src/bytes/bytes_s390x.go index 68b57301fe..e25ca4b84e 100644 --- a/src/bytes/bytes_s390x.go +++ b/src/bytes/bytes_s390x.go @@ -98,7 +98,7 @@ func Index(s, sep []byte) int { } // Count counts the number of non-overlapping instances of sep in s. -// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. +// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. func Count(s, sep []byte) int { return countGeneric(s, sep) } -- 2.48.1