diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-10-25 08:02:28 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-10-25 08:02:28 +0000 |
commit | f9a64dbd998f7761e6a06fc71052346d7f76c7f4 (patch) | |
tree | 3608e9a4fa99bbcc7d88dda34b1619a4ac4b122b /libgo/go/exp/html/parse_test.go | |
parent | 29a742dc2ec93b766a342fa6fb65da055c5417fc (diff) | |
download | gcc-f9a64dbd998f7761e6a06fc71052346d7f76c7f4.tar.gz |
2012-10-25 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 192797 using svnmerge.py
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@192798 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgo/go/exp/html/parse_test.go')
-rw-r--r-- | libgo/go/exp/html/parse_test.go | 282 |
1 files changed, 201 insertions, 81 deletions
diff --git a/libgo/go/exp/html/parse_test.go b/libgo/go/exp/html/parse_test.go index f3f966cf58e..7cf2ff4163e 100644 --- a/libgo/go/exp/html/parse_test.go +++ b/libgo/go/exp/html/parse_test.go @@ -8,9 +8,14 @@ import ( "bufio" "bytes" "errors" + "exp/html/atom" "fmt" "io" + "io/ioutil" "os" + "path/filepath" + "runtime" + "sort" "strings" "testing" ) @@ -37,7 +42,10 @@ func readParseTest(r *bufio.Reader) (text, want, context string, err error) { } b = append(b, line...) } - text = strings.TrimRight(string(b), "\n") + text = string(b) + if strings.HasSuffix(text, "\n") { + text = text[:len(text)-1] + } b = b[:0] // Skip the error list. @@ -70,12 +78,22 @@ func readParseTest(r *bufio.Reader) (text, want, context string, err error) { if string(line) != "#document\n" { return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line) } + inQuote := false for { line, err = r.ReadSlice('\n') if err != nil && err != io.EOF { return "", "", "", err } - if len(line) == 0 || len(line) == 1 && line[0] == '\n' { + trimmed := bytes.Trim(line, "| \n") + if len(trimmed) > 0 { + if line[0] == '|' && trimmed[0] == '"' { + inQuote = true + } + if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) { + inQuote = false + } + } + if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote { break } b = append(b, line...) @@ -90,6 +108,23 @@ func dumpIndent(w io.Writer, level int) { } } +type sortedAttributes []Attribute + +func (a sortedAttributes) Len() int { + return len(a) +} + +func (a sortedAttributes) Less(i, j int) bool { + if a[i].Namespace != a[j].Namespace { + return a[i].Namespace < a[j].Namespace + } + return a[i].Key < a[j].Key +} + +func (a sortedAttributes) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + func dumpLevel(w io.Writer, n *Node, level int) error { dumpIndent(w, level) switch n.Type { @@ -103,13 +138,8 @@ func dumpLevel(w io.Writer, n *Node, level int) error { } else { fmt.Fprintf(w, "<%s>", n.Data) } - attr := n.Attr - if len(attr) == 2 && attr[0].Namespace == "xml" && attr[1].Namespace == "xlink" { - // Some of the test cases in tests10.dat change the order of adjusted - // foreign attributes, but that behavior is not in the spec, and could - // simply be an implementation detail of html5lib's python map ordering. - attr[0], attr[1] = attr[1], attr[0] - } + attr := sortedAttributes(n.Attr) + sort.Sort(attr) for _, a := range attr { io.WriteString(w, "\n") dumpIndent(w, level+1) @@ -147,7 +177,7 @@ func dumpLevel(w io.Writer, n *Node, level int) error { return errors.New("unknown node type") } io.WriteString(w, "\n") - for _, c := range n.Child { + for c := n.FirstChild; c != nil; c = c.NextSibling { if err := dumpLevel(w, c, level+1); err != nil { return err } @@ -156,106 +186,126 @@ func dumpLevel(w io.Writer, n *Node, level int) error { } func dump(n *Node) (string, error) { - if n == nil || len(n.Child) == 0 { + if n == nil || n.FirstChild == nil { return "", nil } var b bytes.Buffer - for _, child := range n.Child { - if err := dumpLevel(&b, child, 0); err != nil { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if err := dumpLevel(&b, c, 0); err != nil { return "", err } } return b.String(), nil } +const testDataDir = "testdata/webkit/" + func TestParser(t *testing.T) { - testFiles := []struct { - filename string - // n is the number of test cases to run from that file. - // -1 means all test cases. - n int - }{ - // TODO(nigeltao): Process all the test cases from all the .dat files. - {"adoption01.dat", -1}, - {"doctype01.dat", -1}, - {"tests1.dat", -1}, - {"tests2.dat", -1}, - {"tests3.dat", -1}, - {"tests4.dat", -1}, - {"tests5.dat", -1}, - {"tests6.dat", -1}, - {"tests10.dat", 35}, + testFiles, err := filepath.Glob(testDataDir + "*.dat") + if err != nil { + t.Fatal(err) } for _, tf := range testFiles { - f, err := os.Open("testdata/webkit/" + tf.filename) + f, err := os.Open(tf) if err != nil { t.Fatal(err) } defer f.Close() r := bufio.NewReader(f) - for i := 0; i != tf.n; i++ { + + for i := 0; ; i++ { text, want, context, err := readParseTest(r) - if err == io.EOF && tf.n == -1 { + if err == io.EOF { break } if err != nil { t.Fatal(err) } - var doc *Node - if context == "" { - doc, err = Parse(strings.NewReader(text)) - if err != nil { - t.Fatal(err) - } - } else { - contextNode := &Node{ - Type: ElementNode, - Data: context, - } - nodes, err := ParseFragment(strings.NewReader(text), contextNode) - if err != nil { - t.Fatal(err) - } - doc = &Node{ - Type: DocumentNode, - } - for _, n := range nodes { - doc.Add(n) - } - } + err = testParseCase(text, want, context) - got, err := dump(doc) - if err != nil { - t.Fatal(err) - } - // Compare the parsed tree to the #document section. - if got != want { - t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", tf.filename, i, text, got, want) - continue - } - if renderTestBlacklist[text] || context != "" { - continue - } - // Check that rendering and re-parsing results in an identical tree. - pr, pw := io.Pipe() - go func() { - pw.CloseWithError(Render(pw, doc)) - }() - doc1, err := Parse(pr) if err != nil { - t.Fatal(err) - } - got1, err := dump(doc1) - if err != nil { - t.Fatal(err) + t.Errorf("%s test #%d %q, %s", tf, i, text, err) } - if got != got1 { - t.Errorf("%s test #%d %q, got vs got1:\n----\n%s----\n%s----", tf.filename, i, text, got, got1) - continue + } + } +} + +// testParseCase tests one test case from the test files. If the test does not +// pass, it returns an error that explains the failure. +// text is the HTML to be parsed, want is a dump of the correct parse tree, +// and context is the name of the context node, if any. +func testParseCase(text, want, context string) (err error) { + defer func() { + if x := recover(); x != nil { + switch e := x.(type) { + case error: + err = e + default: + err = fmt.Errorf("%v", e) } } + }() + + var doc *Node + if context == "" { + doc, err = Parse(strings.NewReader(text)) + if err != nil { + return err + } + } else { + contextNode := &Node{ + Type: ElementNode, + DataAtom: atom.Lookup([]byte(context)), + Data: context, + } + nodes, err := ParseFragment(strings.NewReader(text), contextNode) + if err != nil { + return err + } + doc = &Node{ + Type: DocumentNode, + } + for _, n := range nodes { + doc.AppendChild(n) + } + } + + if err := checkTreeConsistency(doc); err != nil { + return err + } + + got, err := dump(doc) + if err != nil { + return err + } + // Compare the parsed tree to the #document section. + if got != want { + return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want) + } + + if renderTestBlacklist[text] || context != "" { + return nil } + + // Check that rendering and re-parsing results in an identical tree. + pr, pw := io.Pipe() + go func() { + pw.CloseWithError(Render(pw, doc)) + }() + doc1, err := Parse(pr) + if err != nil { + return err + } + got1, err := dump(doc1) + if err != nil { + return err + } + if got != got1 { + return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1) + } + + return nil } // Some test input result in parse trees are not 'well-formed' despite @@ -266,11 +316,81 @@ var renderTestBlacklist = map[string]bool{ // The second <a> will be reparented to the first <table>'s parent. This // results in an <a> whose parent is an <a>, which is not 'well-formed'. `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true, + // The same thing with a <p>: + `<p><table></p>`: true, // More cases of <a> being reparented: `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true, `<a><table><a></table><p><a><div><a>`: true, `<a><table><td><a><table></table><a></tr><a></table><a>`: true, + // A similar reparenting situation involving <nobr>: + `<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true, // A <plaintext> element is reparented, putting it before a table. // A <plaintext> element can't have anything after it in HTML. - `<table><plaintext><td>`: true, + `<table><plaintext><td>`: true, + `<!doctype html><table><plaintext></plaintext>`: true, + `<!doctype html><table><tbody><plaintext></plaintext>`: true, + `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true, + // A form inside a table inside a form doesn't work either. + `<!doctype html><form><table></form><form></table></form>`: true, + // A script that ends at EOF may escape its own closing tag when rendered. + `<!doctype html><script><!--<script `: true, + `<!doctype html><script><!--<script <`: true, + `<!doctype html><script><!--<script <a`: true, + `<!doctype html><script><!--<script </`: true, + `<!doctype html><script><!--<script </s`: true, + `<!doctype html><script><!--<script </script`: true, + `<!doctype html><script><!--<script </scripta`: true, + `<!doctype html><script><!--<script -`: true, + `<!doctype html><script><!--<script -a`: true, + `<!doctype html><script><!--<script -<`: true, + `<!doctype html><script><!--<script --`: true, + `<!doctype html><script><!--<script --a`: true, + `<!doctype html><script><!--<script --<`: true, + `<script><!--<script `: true, + `<script><!--<script <a`: true, + `<script><!--<script </script`: true, + `<script><!--<script </scripta`: true, + `<script><!--<script -`: true, + `<script><!--<script -a`: true, + `<script><!--<script --`: true, + `<script><!--<script --a`: true, + `<script><!--<script <`: true, + `<script><!--<script </`: true, + `<script><!--<script </s`: true, + // Reconstructing the active formatting elements results in a <plaintext> + // element that contains an <a> element. + `<!doctype html><p><a><plaintext>b`: true, +} + +func TestNodeConsistency(t *testing.T) { + // inconsistentNode is a Node whose DataAtom and Data do not agree. + inconsistentNode := &Node{ + Type: ElementNode, + DataAtom: atom.Frameset, + Data: "table", + } + _, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode) + if err == nil { + t.Errorf("got nil error, want non-nil") + } +} + +func BenchmarkParser(b *testing.B) { + buf, err := ioutil.ReadFile("testdata/go1.html") + if err != nil { + b.Fatalf("could not read testdata/go1.html: %v", err) + } + b.SetBytes(int64(len(buf))) + runtime.GC() + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + mallocs := ms.Mallocs + b.ResetTimer() + for i := 0; i < b.N; i++ { + Parse(bytes.NewBuffer(buf)) + } + b.StopTimer() + runtime.ReadMemStats(&ms) + mallocs = ms.Mallocs - mallocs + b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N) } |