From 90b76c0f3e3356e17c03baae3e20a4a11c2a6f10 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Wed, 2 Nov 2011 09:42:25 +1100 Subject: [PATCH] html: refactor the blacklist for the "render and re-parse" test. R=andybalholm CC=golang-dev, mikesamuel https://golang.org/cl/5331056 --- src/pkg/html/parse_test.go | 20 ++++++++++++++------ src/pkg/html/render.go | 31 +++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index caf3c92bc7..067eb26d04 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -160,14 +160,10 @@ func TestParser(t *testing.T) { t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", filename, i, text, got, want) continue } - // Check that rendering and re-parsing results in an identical tree. - if filename == "tests1.dat" && (i == 30 || i == 77) { - // Some tests in tests1.dat have such messed-up markup that a correct parse - // results in a non-conforming tree (one element nested inside another). - // Therefore when it is rendered and re-parsed, it isn't the same. - // So we skip rendering on that test. + if renderTestBlacklist[text] { continue } + // Check that rendering and re-parsing results in an identical tree. pr, pw := io.Pipe() go func() { pw.CloseWithError(Render(pw, doc)) @@ -187,3 +183,15 @@ func TestParser(t *testing.T) { } } } + +// Some test input result in parse trees are not 'well-formed' despite +// following the HTML5 recovery algorithms. Rendering and re-parsing such a +// tree will not result in an exact clone of that tree. We blacklist such +// inputs from the render test. +var renderTestBlacklist = map[string]bool{ + // The second will be reparented to the first 's parent. This + // results in an whose parent is an , which is not 'well-formed'. + `
XCY`: true, + // The second will be reparented, similar to the case above. + `ababrx
aoe`: true, +} diff --git a/src/pkg/html/render.go b/src/pkg/html/render.go index d5dc448433..0522b6ef92 100644 --- a/src/pkg/html/render.go +++ b/src/pkg/html/render.go @@ -19,17 +19,28 @@ type writer interface { // Render renders the parse tree n to the given writer. // -// For 'well-formed' parse trees, calling Parse on the output of Render will -// result in a clone of the original tree. +// Rendering is done on a 'best effort' basis: calling Parse on the output of +// Render will always result in something similar to the original tree, but it +// is not necessarily an exact clone unless the original tree was 'well-formed'. +// 'Well-formed' is not easily specified; the HTML5 specification is +// complicated. // -// 'Well-formed' is not formally specified, but calling Parse on arbitrary -// input results in a 'well-formed' parse tree if Parse does not return an -// error. Programmatically constructed trees are typically also 'well-formed', -// but it is possible to construct a tree that, when rendered and re-parsed, -// results in a different tree. A simple example is that a solitary text node -// would become a tree containing , and elements. Another -// example is that the programmatic equivalent of "abc" becomes -// "abc". +// Calling Parse on arbitrary input typically results in a 'well-formed' parse +// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. +// For example, in a 'well-formed' parse tree, no element is a child of +// another element: parsing "" results in two sibling elements. +// Similarly, in a 'well-formed' parse tree, no element is a child of a +// element: parsing "

" results in a

with two sibling +// children; the is reparented to the

's parent. However, calling +// Parse on "
" does not return an error, but the result has an +// element with an child, and is therefore not 'well-formed'. +// +// Programmatically constructed trees are typically also 'well-formed', but it +// is possible to construct a tree that looks innocuous but, when rendered and +// re-parsed, results in a different tree. A simple example is that a solitary +// text node would become a tree containing , and elements. +// Another example is that the programmatic equivalent of "abc" +// becomes "abc". func Render(w io.Writer, n *Node) os.Error { if x, ok := w.(writer); ok { return render(x, n) -- 2.50.0