<li><code>http/spdy</code></li>
</ul>
+<p>
+Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
+</p>
+
<p>
All these packages are available under the same names, with <code>exp/</code> prefixed: <code>exp/ebnf</code> etc.
</p>
<em>Updating</em>:
Code that uses packages in <code>exp</code> will need to be updated by hand,
or else compiled from an installation that has <code>exp</code> available.
-Gofix will warn about such uses.
+Gofix or the compiler will complain about such uses.
<br>
<font color="red">TODO: gofix should warn about such uses.</font>
</p>
<li><code>http/spdy</code></li>
</ul>
+<p>
+Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
+</p>
+
<p>
All these packages are available under the same names, with <code>exp/</code> prefixed: <code>exp/ebnf</code> etc.
</p>
<em>Updating</em>:
Code that uses packages in <code>exp</code> will need to be updated by hand,
or else compiled from an installation that has <code>exp</code> available.
-Gofix will warn about such uses.
+Gofix or the compiler will complain about such uses.
<br>
<font color="red">TODO: gofix should warn about such uses.</font>
</p>
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/darwin_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/darwin_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/darwin_amd64/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/freebsd_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/freebsd_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/freebsd_amd64/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/linux_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/linux_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_amd64/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+5g -o "$WORK"/unicode/utf8/_obj/_go_.5 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.5
mkdir -p "$GOROOT"/pkg/linux_arm/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/linux_arm/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/netbsd_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/netbsd_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/netbsd_amd64/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/openbsd_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/openbsd_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/openbsd_amd64/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/plan9_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/plan9_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+8g -o "$WORK"/unicode/utf8/_obj/_go_.8 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.8
mkdir -p "$GOROOT"/pkg/windows_386/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_386/unicode/utf8.a
mkdir -p "$WORK"/unicode/utf8/_obj/
cd "$GOROOT"/src/pkg/unicode/utf8
-6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./string.go ./utf8.go
+6g -o "$WORK"/unicode/utf8/_obj/_go_.6 -p unicode/utf8 -I "$WORK" ./utf8.go
gopack grc "$WORK"/unicode/utf8.a "$WORK"/unicode/utf8/_obj/_go_.6
mkdir -p "$GOROOT"/pkg/windows_amd64/unicode/
cp "$WORK"/unicode/utf8.a "$GOROOT"/pkg/windows_amd64/unicode/utf8.a
--- /dev/null
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=exp/utf8string
+GOFILES=\
+ string.go\
+
+include ../../../Make.pkg
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package utf8
+// Package utf8string provides an efficient way to index strings by rune rather than by byte.
+package utf8string
-import "errors"
+import (
+ "errors"
+ "unicode/utf8"
+)
// String wraps a regular string with a small structure that provides more
// efficient indexing by code point index, as opposed to byte index.
s.bytePos = 0
s.runePos = 0
for i := 0; i < len(contents); i++ {
- if contents[i] >= RuneSelf {
+ if contents[i] >= utf8.RuneSelf {
// Not ASCII.
- s.numRunes = RuneCountInString(contents)
- _, s.width = DecodeRuneInString(contents)
+ s.numRunes = utf8.RuneCountInString(contents)
+ _, s.width = utf8.DecodeRuneInString(contents)
s.nonASCII = i
return s
}
switch {
case i == s.runePos-1: // backing up one rune
- r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos])
+ r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos = i
s.bytePos -= s.width
return r
s.bytePos += s.width
fallthrough
case i == s.runePos:
- r, s.width = DecodeRuneInString(s.str[s.bytePos:])
+ r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
return r
case i == 0: // start of string
- r, s.width = DecodeRuneInString(s.str)
+ r, s.width = utf8.DecodeRuneInString(s.str)
s.runePos = 0
s.bytePos = 0
return r
case i == s.numRunes-1: // last rune in string
- r, s.width = DecodeLastRuneInString(s.str)
+ r, s.width = utf8.DecodeLastRuneInString(s.str)
s.runePos = i
s.bytePos = len(s.str) - s.width
return r
if forward {
// TODO: Is it much faster to use a range loop for this scan?
for {
- r, s.width = DecodeRuneInString(s.str[s.bytePos:])
+ r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:])
if s.runePos == i {
break
}
}
} else {
for {
- r, s.width = DecodeLastRuneInString(s.str[0:s.bytePos])
+ r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos])
s.runePos--
s.bytePos -= s.width
if s.runePos == i {
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package utf8_test
+package utf8string
import (
"math/rand"
"testing"
- . "unicode/utf8"
+ "unicode/utf8"
)
+var testStrings = []string{
+ "",
+ "abcd",
+ "☺☻☹",
+ "日a本b語ç日ð本Ê語þ日¥本¼語i日©",
+ "日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
+ "\x80\x80\x80\x80",
+}
+
func TestScanForwards(t *testing.T) {
for _, s := range testStrings {
runes := []rune(s)
if str.Slice(0, 0) != "" {
t.Error("failure with empty slice at beginning")
}
- nr := RuneCountInString(s)
+ nr := utf8.RuneCountInString(s)
if str.Slice(nr, nr) != "" {
t.Error("failure with empty slice at end")
}
TARG=unicode/utf8
GOFILES=\
- string.go\
utf8.go\
include ../../../Make.pkg
package main
// Test that error messages say what the source file says
-// (uint8 vs byte).
+// (uint8 vs byte, int32 vs. rune).
import (
"fmt"
ff.Format(fs, x) // ERROR "rune"
utf8.RuneStart(x) // ERROR "byte"
-
- var s utf8.String
- s.At(x) // ERROR "int"
}