Change strings.Split, bytes.Split to take a maximum substring count argument.

author David Symonds <dsymonds@golang.org>

Thu, 25 Jun 2009 02:02:29 +0000 (19:02 -0700)

committer David Symonds <dsymonds@golang.org>

Thu, 25 Jun 2009 02:02:29 +0000 (19:02 -0700)
author David Symonds <dsymonds@golang.org>
Thu, 25 Jun 2009 02:02:29 +0000 (19:02 -0700)
committer David Symonds <dsymonds@golang.org>
Thu, 25 Jun 2009 02:02:29 +0000 (19:02 -0700)
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go

index 17f82db7ce9820a5c70d379fce450c19aad96b94..e5e8bffd8ccb3ebe9223864cbfae88731d7b088a 100644 (file)
--- a/src/pkg/bytes/bytes.go
+++ b/src/pkg/bytes/bytes.go
@@ -55,19 +55,27 @@ func Copy(dst, src []byte) int {
         return len(src)
  }
  
-// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes).
-// Invalid UTF-8 sequences become correct encodings of U+FFF8.
-func Explode(s []byte) [][]byte {
-       a := make([][]byte, utf8.RuneCount(s));
+// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes),
+// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes.
+func explode(s []byte, n int) [][]byte {
+       if n <= 0 {
+               n = len(s);
+       }
+       a := make([][]byte, n);
         var size, rune int;
-       i := 0;
+       na := 0;
         for len(s) > 0 {
+               if na+1 >= n {
+                       a[na] = s;
+                       na++;
+                       break
+               }
                 rune, size = utf8.DecodeRune(s);
-               a[i] = s[0:size];
+               a[na] = s[0:size];
                 s = s[size:len(s)];
-               i++;
+               na++;
         }
-       return a
+       return a[0:na]
  }
  
  // Count counts the number of non-overlapping instances of sep in s.
@@ -101,27 +109,30 @@ func Index(s, sep []byte) int {
         return -1
  }
  
-// Split returns the array representing the subarrays of s separated by sep. Adjacent
-// occurrences of sep produce empty subarrays.  If sep is empty, it is the same as Explode.
-func Split(s, sep []byte) [][]byte {
+// Split splits the array s around each instance of sep, returning an array of subarrays of s.
+// If sep is empty, Split splits s after each UTF-8 sequence.
+// If n > 0, split Splits s into at most n subarrays; the last subarray will contain an unsplit remainder.
+func Split(s, sep []byte, n int) [][]byte {
         if len(sep) == 0 {
-               return Explode(s)
+               return explode(s, n)
+       }
+       if n <= 0 {
+               n = Count(s, sep) + 1;
         }
         c := sep[0];
         start := 0;
-       n := Count(s, sep)+1;
         a := make([][]byte, n);
         na := 0;
-       for i := 0; i+len(sep) <= len(s); i++ {
+       for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
                 if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
                         a[na] = s[start:i];
                         na++;
                         start = i+len(sep);
-                       i += len(sep)-1
+                       i += len(sep)-1;
                 }
         }
         a[na] = s[start:len(s)];
-       return a
+       return a[0:na+1]
  }
  
  // Join concatenates the elements of a to create a single byte array.   The separator
diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go

index 3fbe21c30dcc3893bbc6c87b0cc820dded0828ae..01adbccfd8bdde34f8e2bc1ccd3bc2292c0cedd8 100644 (file)
--- a/src/pkg/bytes/bytes_test.go
+++ b/src/pkg/bytes/bytes_test.go
@@ -75,24 +75,25 @@ func TestCompare(t *testing.T) {
  
  type ExplodeTest struct {
         s string;
+       n int;
         a []string;
  }
  var explodetests = []ExplodeTest {
-       ExplodeTest{ abcd,      []string{"a", "b", "c", "d"} },
-       ExplodeTest{ faces,     []string{"☺", "☻", "☹" } },
+       ExplodeTest{ abcd,      0, []string{"a", "b", "c", "d"} },
+       ExplodeTest{ faces,     0, []string{"☺", "☻", "☹"} },
+       ExplodeTest{ abcd,      2, []string{"a", "bcd"} },
  }
  func TestExplode(t *testing.T) {
-       for i := 0; i < len(explodetests); i++ {
-               tt := explodetests[i];
-               a := Explode(io.StringBytes(tt.s));
+       for _, tt := range(explodetests) {
+               a := explode(io.StringBytes(tt.s), tt.n);
                 result := arrayOfString(a);
                 if !eq(result, tt.a) {
-                       t.Errorf(`Explode("%s") = %v; want %v`, tt.s, result, tt.a);
+                       t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a);
                         continue;
                 }
                 s := Join(a, []byte{});
                 if string(s) != tt.s {
-                       t.Errorf(`Join(Explode("%s"), "") = "%s"`, tt.s, s);
+                       t.Errorf(`Join(Explode("%s", %d), "") = "%s"`, tt.s, tt.n, s);
                 }
         }
  }
@@ -101,30 +102,35 @@ func TestExplode(t *testing.T) {
  type SplitTest struct {
         s string;
         sep string;
+       n int;
         a []string;
  }
  var splittests = []SplitTest {
-       SplitTest{ abcd,        "a",    []string{"", "bcd"} },
-       SplitTest{ abcd,        "z",    []string{"abcd"} },
-       SplitTest{ abcd,        "",     []string{"a", "b", "c", "d"} },
-       SplitTest{ commas,      ",",    []string{"1", "2", "3", "4"} },
-       SplitTest{ dots,        "...",  []string{"1", ".2", ".3", ".4"} },
-       SplitTest{ faces,       "☹",  []string{"☺☻", ""} },
-       SplitTest{ faces,       "~",    []string{faces} },
-       SplitTest{ faces,       "",     []string{"☺", "☻", "☹"} },
+       SplitTest{ abcd,        "a",    0, []string{"", "bcd"} },
+       SplitTest{ abcd,        "z",    0, []string{"abcd"} },
+       SplitTest{ abcd,        "",     0, []string{"a", "b", "c", "d"} },
+       SplitTest{ commas,      ",",    0, []string{"1", "2", "3", "4"} },
+       SplitTest{ dots,        "...",  0, []string{"1", ".2", ".3", ".4"} },
+       SplitTest{ faces,       "☹",  0, []string{"☺☻", ""} },
+       SplitTest{ faces,       "~",    0, []string{faces} },
+       SplitTest{ faces,       "",     0, []string{"☺", "☻", "☹"} },
+       SplitTest{ "1 2 3 4",   " ",    3, []string{"1", "2", "3 4"} },
+       SplitTest{ "1 2 3",     " ",    3, []string{"1", "2", "3"} },
+       SplitTest{ "1 2",       " ",    3, []string{"1", "2"} },
+       SplitTest{ "123",       "",     2, []string{"1", "23"} },
+       SplitTest{ "123",       "",     17, []string{"1", "2", "3"} },
  }
  func TestSplit(t *testing.T) {
-       for i := 0; i < len(splittests); i++ {
-               tt := splittests[i];
-               a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep));
+       for _, tt := range splittests {
+               a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep), tt.n);
                 result := arrayOfString(a);
                 if !eq(result, tt.a) {
-                       t.Errorf(`Split("%s", "%s") = %v; want %v`, tt.s, tt.sep, result, tt.a);
+                       t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a);
                         continue;
                 }
                 s := Join(a, io.StringBytes(tt.sep));
                 if string(s) != tt.s {
-                       t.Errorf(`Join(Split("%s", "%s"), "%s") = "%s"`, tt.s, tt.sep, tt.sep, s);
+                       t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s);
                 }
         }
  }
diff --git a/src/pkg/exec/exec.go b/src/pkg/exec/exec.go

index c2b7bdd59bf208afcf1aaf7833fd5128461634af..ebb40a2fe83926f4f6c10b73851ede9516c2354f 100644 (file)
--- a/src/pkg/exec/exec.go
+++ b/src/pkg/exec/exec.go
@@ -214,7 +214,7 @@ func LookPath(file string) (string, os.Error) {
                 // (equivalent to PATH=".").
                 pathenv = "";
         }
-       for i, dir := range strings.Split(pathenv, ":") {
+       for i, dir := range strings.Split(pathenv, ":", 0) {
                 if dir == "" {
                         // Unix shell semantics: path element "" means "."
                         dir = ".";
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go

index 19a65a2277c2e5c857c737794e881c611d6947cb..064080fe7658d2de844c450fc6991a8497220f96 100644 (file)
--- a/src/pkg/go/doc/comment.go
+++ b/src/pkg/go/doc/comment.go
@@ -45,7 +45,7 @@ func commentText(comments []string) string {
         lines := make([]string, 0, 20);
         for i, c := range comments {
                 // split on newlines
-               cl := strings.Split(c, "\n");
+               cl := strings.Split(c, "\n", 0);
  
                 // walk lines, stripping comment markers
                 w := 0;
diff --git a/src/pkg/http/client.go b/src/pkg/http/client.go

index 52a536fb388d360e95aa75a9215ddb90749e8fb9..8c17eb8e36cb1bf4a2eef838bd7abed31effc11a 100644 (file)
--- a/src/pkg/http/client.go
+++ b/src/pkg/http/client.go
@@ -108,13 +108,12 @@ func send(req *Request) (resp *Response, err os.Error) {
         if err != nil {
                 return nil, err;
         }
-       i := strings.Index(line, " ");
-       j := strings.Index(line[i+1:len(line)], " ") + i+1;
-       if i < 0 || j < 0 {
+       f := strings.Split(line, " ", 3);
+       if len(f) < 3 {
                 return nil, os.ErrorString(fmt.Sprintf("Invalid first line in HTTP response: %q", line));
         }
-       resp.Status = line[i+1:len(line)];
-       resp.StatusCode, err = strconv.Atoi(line[i+1:j]);
+       resp.Status = f[1] + " " + f[2];
+       resp.StatusCode, err = strconv.Atoi(f[1]);
         if err != nil {
                 return nil, os.ErrorString(fmt.Sprintf("Invalid status code in HTTP response: %q", line));
         }
diff --git a/src/pkg/http/request.go b/src/pkg/http/request.go

index b331eb08372a58b3a5ec615c584e938bba1392ac..9051d4c43dd069dedf1c6c35e2b6fe214fa1c607 100644 (file)
--- a/src/pkg/http/request.go
+++ b/src/pkg/http/request.go
@@ -442,7 +442,7 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
         }
  
         var f []string;
-       if f = strings.Split(s, " "); len(f) != 3 {
+       if f = strings.Split(s, " ", 3); len(f) < 3 {
                 return nil, BadRequest
         }
         req.Method, req.RawUrl, req.Proto = f[0], f[1], f[2];
@@ -572,8 +572,8 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
  
  func parseForm(body string) (data map[string] *vector.StringVector, err os.Error) {
         data = make(map[string] *vector.StringVector);
-       for _, kv := range strings.Split(body, "&") {
-               kvPair := strings.Split(kv, "=");
+       for _, kv := range strings.Split(body, "&", 0) {
+               kvPair := strings.Split(kv, "=", 2);
  
                 var key, value string;
                 var e os.Error;
diff --git a/src/pkg/strconv/fp_test.go b/src/pkg/strconv/fp_test.go

index f1993bb7e2f3db1778b845ab020221fd0de44c46..20e158cec90eec016d35b04ba69dc2e35ae578a1 100644 (file)
--- a/src/pkg/strconv/fp_test.go
+++ b/src/pkg/strconv/fp_test.go
@@ -28,7 +28,7 @@ func pow2(i int) float64 {
  // Wrapper around strconv.Atof64.  Handles dddddp+ddd (binary exponent)
  // itself, passes the rest on to strconv.Atof64.
  func myatof64(s string) (f float64, ok bool) {
-       a := strings.Split(s, "p");
+       a := strings.Split(s, "p", 2);
         if len(a) == 2 {
                 n, err := strconv.Atoi64(a[0]);
                 if err != nil {
@@ -72,7 +72,7 @@ func myatof64(s string) (f float64, ok bool) {
  // Wrapper around strconv.Atof32.  Handles dddddp+ddd (binary exponent)
  // itself, passes the rest on to strconv.Atof32.
  func myatof32(s string) (f float32, ok bool) {
-       a := strings.Split(s, "p");
+       a := strings.Split(s, "p", 2);
         if len(a) == 2 {
                 n, err := strconv.Atoi(a[0]);
                 if err != nil {
@@ -115,7 +115,7 @@ func TestFp(t *testing.T) {
                 if len(line) == 0 || line[0] == '#' {
                         continue
                 }
-               a := strings.Split(line, " ");
+               a := strings.Split(line, " ", 0);
                 if len(a) != 4 {
                         t.Error("testfp.txt:", lineno, ": wrong field count\n");
                         continue;
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go

index 03509077713dfa7dd4cdfca636a433f95aa09bdd..9b0f031b9afd99f4073a26bbe83f79b37c7adf0a 100644 (file)
--- a/src/pkg/strings/strings.go
+++ b/src/pkg/strings/strings.go
@@ -7,19 +7,27 @@ package strings
  
  import "utf8"
  
-// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings).
+// explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n <= 0 means no limit).
  // Invalid UTF-8 sequences become correct encodings of U+FFF8.
-func Explode(s string) []string {
-       a := make([]string, utf8.RuneCountInString(s));
+func explode(s string, n int) []string {
+       if n <= 0 {
+               n = len(s);
+       }
+       a := make([]string, n);
         var size, rune int;
-       i := 0;
+       na := 0;
         for len(s) > 0 {
+               if na+1 >= n {
+                       a[na] = s;
+                       na++;
+                       break
+               }
                 rune, size = utf8.DecodeRuneInString(s);
                 s = s[size:len(s)];
-               a[i] = string(rune);
-               i++;
+               a[na] = string(rune);
+               na++;
         }
-       return a
+       return a[0:na]
  }
  
  // Count counts the number of non-overlapping instances of sep in s.
@@ -68,27 +76,30 @@ func LastIndex(s, sep string) int {
         return -1
  }
  
-// Split returns the array representing the substrings of s separated by string sep. Adjacent
-// occurrences of sep produce empty substrings.  If sep is empty, it is the same as Explode.
-func Split(s, sep string) []string {
+// Split splits the string s around each instance of sep, returning an array of substrings of s.
+// If sep is empty, Split splits s after each UTF-8 sequence.
+// If n > 0, split Splits s into at most n substrings; the last subarray will contain an unsplit remainder string.
+func Split(s, sep string, n int) []string {
         if sep == "" {
-               return Explode(s)
+               return explode(s, n)
+       }
+       if n <= 0 {
+               n = Count(s, sep) + 1;
         }
         c := sep[0];
         start := 0;
-       n := Count(s, sep)+1;
         a := make([]string, n);
         na := 0;
-       for i := 0; i+len(sep) <= len(s); i++ {
+       for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
                 if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
                         a[na] = s[start:i];
                         na++;
                         start = i+len(sep);
-                       i += len(sep)-1
+                       i += len(sep)-1;
                 }
         }
         a[na] = s[start:len(s)];
-       return a
+       return a[0:na+1]
  }
  
  // Join concatenates the elements of a to create a single string.   The separator string
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go

index 6464ca3992df2faa97dc85fa184fbc72d1f81b91..7a41584b7003974385a9837643306e404daf4c04 100644 (file)
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@@ -83,23 +83,24 @@ func TestLastIndex(t *testing.T) {
  
  type ExplodeTest struct {
         s string;
+       n int;
         a []string;
  }
  var explodetests = []ExplodeTest {
-       ExplodeTest{ abcd,      []string{"a", "b", "c", "d"} },
-       ExplodeTest{ faces,     []string{"☺", "☻", "☹" } },
+       ExplodeTest{ abcd,      4, []string{"a", "b", "c", "d"} },
+       ExplodeTest{ faces,     3, []string{"☺", "☻", "☹"} },
+       ExplodeTest{ abcd,      2, []string{"a", "bcd"} },
  }
  func TestExplode(t *testing.T) {
-       for i := 0; i < len(explodetests); i++ {
-               tt := explodetests[i];
-               a := Explode(tt.s);
+       for _, tt := range explodetests {
+               a := explode(tt.s, tt.n);
                 if !eq(a, tt.a) {
-                       t.Errorf("Explode(%q) = %v; want %v", tt.s, a, tt.a);
+                       t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a);
                         continue;
                 }
                 s := Join(a, "");
                 if s != tt.s {
-                       t.Errorf(`Join(Explode(%q), "") = %q`, tt.s, s);
+                       t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s);
                 }
         }
  }
@@ -107,29 +108,33 @@ func TestExplode(t *testing.T) {
  type SplitTest struct {
         s string;
         sep string;
+       n int;
         a []string;
  }
  var splittests = []SplitTest {
-       SplitTest{ abcd,        "a",    []string{"", "bcd"} },
-       SplitTest{ abcd,        "z",    []string{"abcd"} },
-       SplitTest{ abcd,        "",     []string{"a", "b", "c", "d"} },
-       SplitTest{ commas,      ",",    []string{"1", "2", "3", "4"} },
-       SplitTest{ dots,        "...",  []string{"1", ".2", ".3", ".4"} },
-       SplitTest{ faces,       "☹",  []string{"☺☻", ""} },
-       SplitTest{ faces,       "~",    []string{faces} },
-       SplitTest{ faces,       "",     []string{"☺", "☻", "☹"} },
+       SplitTest{ abcd,        "a",    0, []string{"", "bcd"} },
+       SplitTest{ abcd,        "z",    0, []string{"abcd"} },
+       SplitTest{ abcd,        "",     0, []string{"a", "b", "c", "d"} },
+       SplitTest{ commas,      ",",    0, []string{"1", "2", "3", "4"} },
+       SplitTest{ dots,        "...",  0, []string{"1", ".2", ".3", ".4"} },
+       SplitTest{ faces,       "☹",  0, []string{"☺☻", ""} },
+       SplitTest{ faces,       "~",    0, []string{faces} },
+       SplitTest{ faces,       "",     0, []string{"☺", "☻", "☹"} },
+       SplitTest{ "1 2 3 4",   " ",    3, []string{"1", "2", "3 4"} },
+       SplitTest{ "1 2",       " ",    3, []string{"1", "2"} },
+       SplitTest{ "123",       "",     2, []string{"1", "23"} },
+       SplitTest{ "123",       "",     17, []string{"1", "2", "3"} },
  }
  func TestSplit(t *testing.T) {
-       for i := 0; i < len(splittests); i++ {
-               tt := splittests[i];
-               a := Split(tt.s, tt.sep);
+       for _, tt := range splittests {
+               a := Split(tt.s, tt.sep, tt.n);
                 if !eq(a, tt.a) {
-                       t.Errorf("Split(%q, %q) = %v; want %v", tt.s, tt.sep, a, tt.a);
+                       t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a);
                         continue;
                 }
                 s := Join(a, tt.sep);
                 if s != tt.s {
-                       t.Errorf("Join(Split(%q, %q), %q) = %q", tt.s, tt.sep, tt.sep, s);
+                       t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s);
                 }
         }
  }
author	David Symonds <dsymonds@golang.org>
	Thu, 25 Jun 2009 02:02:29 +0000 (19:02 -0700)
committer	David Symonds <dsymonds@golang.org>
	Thu, 25 Jun 2009 02:02:29 +0000 (19:02 -0700)
src/pkg/bytes/bytes.go		patch \| blob \| history
src/pkg/bytes/bytes_test.go		patch \| blob \| history
src/pkg/exec/exec.go		patch \| blob \| history
src/pkg/go/doc/comment.go		patch \| blob \| history
src/pkg/http/client.go		patch \| blob \| history
src/pkg/http/request.go		patch \| blob \| history
src/pkg/strconv/fp_test.go		patch \| blob \| history
src/pkg/strings/strings.go		patch \| blob \| history
src/pkg/strings/strings_test.go		patch \| blob \| history