From f979528ce6057e3e16307a8ce11760bfb1785c29 Mon Sep 17 00:00:00 2001 From: Andrew Balholm Date: Mon, 23 Jul 2012 12:39:58 +1000 Subject: [PATCH] exp/html: special handling for entities in attributes Don't unescape entities in attributes when they don't end with a semicolon and they are followed by '=', a letter, or a digit. Pass 6 more tests from the WebKit test suite, plus one that was commented out in token_test.go. R=nigeltao CC=golang-dev https://golang.org/cl/6405073 --- src/pkg/exp/html/escape.go | 9 +++++---- src/pkg/exp/html/testlogs/entities02.dat.log | 12 ++++++------ src/pkg/exp/html/token.go | 4 ++-- src/pkg/exp/html/token_test.go | 13 +++++-------- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/pkg/exp/html/escape.go b/src/pkg/exp/html/escape.go index 6a9d8f6e6f..7827dc2d50 100644 --- a/src/pkg/exp/html/escape.go +++ b/src/pkg/exp/html/escape.go @@ -163,14 +163,15 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { } // unescape unescapes b's entities in-place, so that "a<b" becomes "a" PASS "
" PASS "
" PASS "
" -FAIL "
" -FAIL "
" -FAIL "
" -FAIL "
" -FAIL "
" +PASS "
" +PASS "
" +PASS "
" +PASS "
" +PASS "
" PASS "
" PASS "
" PASS "
" @@ -15,7 +15,7 @@ PASS "
" PASS "
" PASS "
" PASS "
" -FAIL "
" +PASS "
" PASS "
" PASS "
ZZ£_id=23
" PASS "
ZZ&prod_id=23
" diff --git a/src/pkg/exp/html/token.go b/src/pkg/exp/html/token.go index 4ca0a90b34..b20de87bee 100644 --- a/src/pkg/exp/html/token.go +++ b/src/pkg/exp/html/token.go @@ -741,7 +741,7 @@ func (z *Tokenizer) Text() []byte { z.data.end = z.raw.end s = convertNewlines(s) if !z.textIsRaw { - s = unescape(s) + s = unescape(s, false) } return s } @@ -775,7 +775,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) { z.nAttrReturned++ key = z.buf[x[0].start:x[0].end] val = z.buf[x[1].start:x[1].end] - return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr) + return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr) } } return nil, nil, false diff --git a/src/pkg/exp/html/token_test.go b/src/pkg/exp/html/token_test.go index a802bf3f2c..5209d139bb 100644 --- a/src/pkg/exp/html/token_test.go +++ b/src/pkg/exp/html/token_test.go @@ -370,14 +370,11 @@ var tokenTests = []tokenTest{ `<&alsoDoesntExist;&`, `$<&alsoDoesntExist;&`, }, - /* - // TODO: re-enable this test when it works. This input/output matches html5lib's behavior. - { - "entity without semicolon", - `¬it;∉`, - `¬it;∉$`, - }, - */ + { + "entity without semicolon", + `¬it;∉`, + `¬it;∉$`, + }, { "entity with digits", "½", -- 2.48.1