// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
// It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s.
+// If r is utf8.RuneError, it returns the first instance of any
+// invalid UTF-8 byte sequence.
func IndexRune(s []byte, r rune) int {
- if r < utf8.RuneSelf {
+ switch {
+ case 0 <= r && r < utf8.RuneSelf:
return IndexByte(s, byte(r))
+ case r == utf8.RuneError:
+ for i := 0; i < len(s); {
+ r1, n := utf8.DecodeRune(s[i:])
+ if r1 == utf8.RuneError {
+ return i
+ }
+ i += n
+ }
+ return -1
+ case !utf8.ValidRune(r):
+ return -1
+ default:
+ var b [utf8.UTFMax]byte
+ n := utf8.EncodeRune(b[:], r)
+ return Index(s, b[:n])
}
- var b [utf8.UTFMax]byte
- n := utf8.EncodeRune(b[:], r)
- return Index(s, b[:n])
}
// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
{dots + dots + dots, " ", -1},
}
-var indexRuneTests = []BinOpTest{
- {"", "a", -1},
- {"", "☺", -1},
- {"foo", "☹", -1},
- {"foo", "o", 1},
- {"foo☺bar", "☺", 3},
- {"foo☺☻☹bar", "☹", 9},
-}
-
// Execute f on each test case. funcName should be the name of f; it's used
// in failure reports.
func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, testCases []BinOpTest) {
}
func TestIndexRune(t *testing.T) {
- for _, tt := range indexRuneTests {
- a := []byte(tt.a)
- r, _ := utf8.DecodeRuneInString(tt.b)
- pos := IndexRune(a, r)
- if pos != tt.i {
- t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos)
+ tests := []struct {
+ in string
+ rune rune
+ want int
+ }{
+ {"", 'a', -1},
+ {"", '☺', -1},
+ {"foo", '☹', -1},
+ {"foo", 'o', 1},
+ {"foo☺bar", '☺', 3},
+ {"foo☺☻☹bar", '☹', 9},
+ {"a A x", 'A', 2},
+ {"some_text=some_value", '=', 9},
+ {"☺a", 'a', 3},
+ {"a☻☺b", '☺', 4},
+
+ // RuneError should match any invalid UTF-8 byte sequence.
+ {"�", '�', 0},
+ {"\xff", '�', 0},
+ {"☻x�", '�', len("☻x")},
+ {"☻x\xe2\x98", '�', len("☻x")},
+ {"☻x\xe2\x98�", '�', len("☻x")},
+ {"☻x\xe2\x98x", '�', len("☻x")},
+
+ // Invalid rune values should never match.
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+ }
+ for _, tt := range tests {
+ if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
+ t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
}
}
haystack := []byte("test世界")
-
allocs := testing.AllocsPerRun(1000, func() {
if i := IndexRune(haystack, 's'); i != 2 {
t.Fatalf("'s' at %d; want 2", i)
}
})
if allocs != 0 {
- t.Errorf(`expected no allocations, got %f`, allocs)
+ t.Errorf("expected no allocations, got %f", allocs)
}
}
// IndexRune returns the index of the first instance of the Unicode code point
// r, or -1 if rune is not present in s.
+// If r is utf8.RuneError, it returns the first instance of any
+// invalid UTF-8 byte sequence.
func IndexRune(s string, r rune) int {
- if r < utf8.RuneSelf {
+ switch {
+ case 0 <= r && r < utf8.RuneSelf:
return IndexByte(s, byte(r))
+ case r == utf8.RuneError:
+ for i, r := range s {
+ if r == utf8.RuneError {
+ return i
+ }
+ }
+ return -1
+ case !utf8.ValidRune(r):
+ return -1
+ default:
+ return Index(s, string(r))
}
-
- return Index(s, string(r))
}
// IndexAny returns the index of the first instance of any Unicode code point
}
}
-var indexRuneTests = []struct {
- s string
- rune rune
- out int
-}{
- {"a A x", 'A', 2},
- {"some_text=some_value", '=', 9},
- {"☺a", 'a', 3},
- {"a☻☺b", '☺', 4},
-}
-
func TestIndexRune(t *testing.T) {
- for _, test := range indexRuneTests {
- if actual := IndexRune(test.s, test.rune); actual != test.out {
- t.Errorf("IndexRune(%q,%d)= %v; want %v", test.s, test.rune, actual, test.out)
+ tests := []struct {
+ in string
+ rune rune
+ want int
+ }{
+ {"", 'a', -1},
+ {"", '☺', -1},
+ {"foo", '☹', -1},
+ {"foo", 'o', 1},
+ {"foo☺bar", '☺', 3},
+ {"foo☺☻☹bar", '☹', 9},
+ {"a A x", 'A', 2},
+ {"some_text=some_value", '=', 9},
+ {"☺a", 'a', 3},
+ {"a☻☺b", '☺', 4},
+
+ // RuneError should match any invalid UTF-8 byte sequence.
+ {"�", '�', 0},
+ {"\xff", '�', 0},
+ {"☻x�", '�', len("☻x")},
+ {"☻x\xe2\x98", '�', len("☻x")},
+ {"☻x\xe2\x98�", '�', len("☻x")},
+ {"☻x\xe2\x98x", '�', len("☻x")},
+
+ // Invalid rune values should never match.
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+ }
+ for _, tt := range tests {
+ if got := IndexRune(tt.in, tt.rune); got != tt.want {
+ t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
}
}
t.Fatalf("'世' at %d; want 4", i)
}
})
-
if allocs != 0 {
- t.Errorf(`expected no allocations, got %f`, allocs)
+ t.Errorf("expected no allocations, got %f", allocs)
}
}