From 2f80d328e87b8155239ed950f4edaf02f1527dea Mon Sep 17 00:00:00 2001
From: Rob Pike <r@golang.org>
Date: Sat, 25 Sep 2010 08:52:29 +1000
Subject: [PATCH] utf8.String: Slice(i,j)

R=rsc
CC=golang-dev
https://golang.org/cl/2225048
---
 src/pkg/utf8/string.go      | 38 +++++++++++++++++++++++++++++++++++-
 src/pkg/utf8/string_test.go | 39 +++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/src/pkg/utf8/string.go b/src/pkg/utf8/string.go
index ce74f720db..59107ac19d 100644
--- a/src/pkg/utf8/string.go
+++ b/src/pkg/utf8/string.go
@@ -11,6 +11,8 @@ package utf8
 // O(N) in the length of the string, but the overhead is less than always
 // scanning from the beginning.
 // If the string is ASCII, random access is O(1).
+// Unlike the built-in string type, String has internal mutable state and
+// is not thread-safe.
 type String struct {
 	str      string
 	numRunes int
@@ -55,6 +57,39 @@ func (s *String) IsASCII() bool {
 	return s.width == 0
 }
 
+// Slice returns the string sliced at rune positions [i:j].
+func (s *String) Slice(i, j int) string {
+	// ASCII is easy.  Let the compiler catch the indexing error if there is one.
+	if j < s.nonASCII {
+		return s.str[i:j]
+	}
+	if i < 0 || j > s.numRunes || i > j {
+		panic(sliceOutOfRange)
+	}
+	if i == j {
+		return ""
+	}
+	// For non-ASCII, after At(i), bytePos is always the position of the indexed character.
+	var low, high int
+	switch {
+	case i < s.nonASCII:
+		low = i
+	case i == s.numRunes:
+		low = len(s.str)
+	default:
+		s.At(i)
+		low = s.bytePos
+	}
+	switch {
+	case j == s.numRunes:
+		high = len(s.str)
+	default:
+		s.At(j)
+		high = s.bytePos
+	}
+	return s.str[low:high]
+}
+
 // At returns the rune with index i in the String.  The sequence of runes is the same
 // as iterating over the contents with a "for range" clause.
 func (s *String) At(i int) int {
@@ -163,4 +198,5 @@ func (err error) String() string {
 func (err error) RunTimeError() {
 }
 
-var outOfRange = error("utf8.String: index out of Range")
+var outOfRange = error("utf8.String: index out of range")
+var sliceOutOfRange = error("utf8.String: slice index out of range")
diff --git a/src/pkg/utf8/string_test.go b/src/pkg/utf8/string_test.go
index 7ce8f8a7ea..484d46fbff 100644
--- a/src/pkg/utf8/string_test.go
+++ b/src/pkg/utf8/string_test.go
@@ -68,3 +68,42 @@ func TestRandomAccess(t *testing.T) {
 		}
 	}
 }
+
+func TestRandomSliceAccess(t *testing.T) {
+	for _, s := range testStrings {
+		if len(s) == 0 || s[0] == '\x80' { // the bad-UTF-8 string fools this simple test
+			continue
+		}
+		runes := []int(s)
+		str := NewString(s)
+		if str.RuneCount() != len(runes) {
+			t.Error("%s: expected %d runes; got %d", s, len(runes), str.RuneCount())
+			break
+		}
+		for k := 0; k < randCount; k++ {
+			i := rand.Intn(len(runes))
+			j := rand.Intn(len(runes) + 1)
+			if i > j { // include empty strings
+				continue
+			}
+			expect := string(runes[i:j])
+			got := str.Slice(i, j)
+			if got != expect {
+				t.Errorf("%s[%d:%d]: expected %q got %q", s, i, j, expect, got)
+			}
+		}
+	}
+}
+
+func TestLimitSliceAccess(t *testing.T) {
+	for _, s := range testStrings {
+		str := NewString(s)
+		if str.Slice(0, 0) != "" {
+			t.Error("failure with empty slice at beginning")
+		}
+		nr := RuneCountInString(s)
+		if str.Slice(nr, nr) != "" {
+			t.Error("failure with empty slice at end")
+		}
+	}
+}
-- 
2.50.0