Don't unescape entities in attributes when they don't end with
a semicolon and they are followed by '=', a letter, or a digit.
Pass 6 more tests from the WebKit test suite, plus one that was
commented out in token_test.go.
R=nigeltao
CC=golang-dev
https://golang.org/cl/
6405073
}
// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b".
-func unescape(b []byte) []byte {
+// attribute should be true if parsing an attribute value.
+func unescape(b []byte, attribute bool) []byte {
for i, c := range b {
if c == '&' {
- dst, src := unescapeEntity(b, i, i, false)
+ dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) {
c := b[src]
if c == '&' {
- dst, src = unescapeEntity(b, dst, src, false)
+ dst, src = unescapeEntity(b, dst, src, attribute)
} else {
b[dst] = c
dst, src = dst+1, src+1
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
- return string(unescape([]byte(s)))
+ return string(unescape([]byte(s), false))
}
}
return s
PASS "<div bar=\"ZZ&\"></div>"
PASS "<div bar='ZZ&'></div>"
PASS "<div bar=ZZ&></div>"
-FAIL "<div bar=\"ZZ>=YY\"></div>"
-FAIL "<div bar=\"ZZ>0YY\"></div>"
-FAIL "<div bar=\"ZZ>9YY\"></div>"
-FAIL "<div bar=\"ZZ>aYY\"></div>"
-FAIL "<div bar=\"ZZ>ZYY\"></div>"
+PASS "<div bar=\"ZZ>=YY\"></div>"
+PASS "<div bar=\"ZZ>0YY\"></div>"
+PASS "<div bar=\"ZZ>9YY\"></div>"
+PASS "<div bar=\"ZZ>aYY\"></div>"
+PASS "<div bar=\"ZZ>ZYY\"></div>"
PASS "<div bar=\"ZZ> YY\"></div>"
PASS "<div bar=\"ZZ>\"></div>"
PASS "<div bar='ZZ>'></div>"
PASS "<div bar=\"ZZ&prod_id=23\"></div>"
PASS "<div bar=\"ZZ£_id=23\"></div>"
PASS "<div bar=\"ZZ∏_id=23\"></div>"
-FAIL "<div bar=\"ZZ£=23\"></div>"
+PASS "<div bar=\"ZZ£=23\"></div>"
PASS "<div bar=\"ZZ&prod=23\"></div>"
PASS "<div>ZZ£_id=23</div>"
PASS "<div>ZZ&prod_id=23</div>"
z.data.end = z.raw.end
s = convertNewlines(s)
if !z.textIsRaw {
- s = unescape(s)
+ s = unescape(s, false)
}
return s
}
z.nAttrReturned++
key = z.buf[x[0].start:x[0].end]
val = z.buf[x[1].start:x[1].end]
- return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr)
+ return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
}
}
return nil, nil, false
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
},
- /*
- // TODO: re-enable this test when it works. This input/output matches html5lib's behavior.
- {
- "entity without semicolon",
- `¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
- `¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
- },
- */
+ {
+ "entity without semicolon",
+ `¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
+ `¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
+ },
{
"entity with digits",
"½",