// All entities that do not end with ';' are 6 or fewer bytes long.
const longestEntityWithoutSemicolon = 6
+// entityMaps returns entity and entity2.
+//
// entity is a map from HTML entity names to their values. The semicolon matters:
// https://html.spec.whatwg.org/multipage/named-characters.html
// lists both "amp" and "amp;" as two separate entries.
-//
// Note that the HTML5 list is larger than the HTML4 list at
// http://www.w3.org/TR/html4/sgml/entities.html
-var entity map[string]rune
-
-// HTML entities that are two unicode codepoints.
-var entity2 map[string][2]rune
-
-// populateMapsOnce guards calling populateMaps.
-var populateMapsOnce sync.Once
-
-// populateMaps populates entity and entity2.
-func populateMaps() {
+//
+// entity2 is a map of HTML entities to two unicode codepoints.
+var entityMaps = sync.OnceValues(func() (entity map[string]rune, entity2 map[string][2]rune) {
entity = map[string]rune{
"AElig;": '\U000000C6',
"AMP;": '\U00000026',
"vsupnE;": {'\u2ACC', '\uFE00'},
"vsupne;": {'\u228B', '\uFE00'},
}
-}
+
+ return entity, entity2
+})
"unicode/utf8"
)
-func init() {
- UnescapeString("") // force load of entity maps
-}
-
func TestEntityLength(t *testing.T) {
+ entity, entity2 := entityMaps()
+
if len(entity) == 0 || len(entity2) == 0 {
t.Fatal("maps not loaded")
}
// unescapeEntity reads an entity like "<" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
-func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) {
+func unescapeEntity(b []byte, dst, src int, entity map[string]rune, entity2 map[string][2]rune) (dst1, src1 int) {
const attribute = false
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
- populateMapsOnce.Do(populateMaps)
i := strings.IndexByte(s, '&')
if i < 0 {
}
b := []byte(s)
- dst, src := unescapeEntity(b, i, i)
+ entity, entity2 := entityMaps()
+ dst, src := unescapeEntity(b, i, i, entity, entity2)
for len(s[src:]) > 0 {
if s[src] == '&' {
i = 0
if i > 0 {
copy(b[dst:], s[src:src+i])
}
- dst, src = unescapeEntity(b, dst+i, src+i)
+ dst, src = unescapeEntity(b, dst+i, src+i, entity, entity2)
}
return string(b[:dst])
}