diff options
author | ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-12-13 19:16:27 +0000 |
---|---|---|
committer | ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-12-13 19:16:27 +0000 |
commit | 43eb1b72e5730064410a2d81e3f8d78ab62776cb (patch) | |
tree | c5132538d5da85ed816c7e1f9d93c4a503b838ab /libgo/go/html | |
parent | e27d80f7754f29f038c29ddcb2decd894d3e4aa4 (diff) | |
download | gcc-43eb1b72e5730064410a2d81e3f8d78ab62776cb.tar.gz |
libgo: Update to weekly.2011-12-02.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@182295 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgo/go/html')
-rw-r--r-- | libgo/go/html/doctype.go | 156 | ||||
-rw-r--r-- | libgo/go/html/parse.go | 169 | ||||
-rw-r--r-- | libgo/go/html/parse_test.go | 182 | ||||
-rw-r--r-- | libgo/go/html/render.go | 68 | ||||
-rw-r--r-- | libgo/go/html/template/clone_test.go | 32 | ||||
-rw-r--r-- | libgo/go/html/template/content.go | 12 | ||||
-rw-r--r-- | libgo/go/html/template/doc.go | 68 | ||||
-rw-r--r-- | libgo/go/html/template/escape.go | 51 | ||||
-rw-r--r-- | libgo/go/html/template/escape_test.go | 38 | ||||
-rw-r--r-- | libgo/go/html/template/template.go | 376 | ||||
-rw-r--r-- | libgo/go/html/token.go | 60 | ||||
-rw-r--r-- | libgo/go/html/token_test.go | 20 |
12 files changed, 840 insertions, 392 deletions
diff --git a/libgo/go/html/doctype.go b/libgo/go/html/doctype.go new file mode 100644 index 00000000000..f692061a551 --- /dev/null +++ b/libgo/go/html/doctype.go @@ -0,0 +1,156 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package html + +import ( + "strings" +) + +// parseDoctype parses the data from a DoctypeToken into a name, +// public identifier, and system identifier. It returns a Node whose Type +// is DoctypeNode, whose Data is the name, and which has attributes +// named "system" and "public" for the two identifiers if they were present. +// quirks is whether the document should be parsed in "quirks mode". +func parseDoctype(s string) (n *Node, quirks bool) { + n = &Node{Type: DoctypeNode} + + // Find the name. + space := strings.IndexAny(s, whitespace) + if space == -1 { + space = len(s) + } + n.Data = s[:space] + // The comparison to "html" is case-sensitive. + if n.Data != "html" { + quirks = true + } + n.Data = strings.ToLower(n.Data) + s = strings.TrimLeft(s[space:], whitespace) + + if len(s) < 6 { + // It can't start with "PUBLIC" or "SYSTEM". + // Ignore the rest of the string. + return n, quirks || s != "" + } + + key := strings.ToLower(s[:6]) + s = s[6:] + for key == "public" || key == "system" { + s = strings.TrimLeft(s, whitespace) + if s == "" { + break + } + quote := s[0] + if quote != '"' && quote != '\'' { + break + } + s = s[1:] + q := strings.IndexRune(s, rune(quote)) + var id string + if q == -1 { + id = s + s = "" + } else { + id = s[:q] + s = s[q+1:] + } + n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) + if key == "public" { + key = "system" + } else { + key = "" + } + } + + if key != "" || s != "" { + quirks = true + } else if len(n.Attr) > 0 { + if n.Attr[0].Key == "public" { + public := strings.ToLower(n.Attr[0].Val) + switch public { + case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": + quirks = true + default: + for _, q := range quirkyIDs { + if strings.HasPrefix(public, q) { + quirks = true + break + } + } + } + // The following two public IDs only cause quirks mode if there is no system ID. + if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || + strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { + quirks = true + } + } + if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && + strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { + quirks = true + } + } + + return n, quirks +} + +// quirkyIDs is a list of public doctype identifiers that cause a document +// to be interpreted in quirks mode. The identifiers should be in lower case. +var quirkyIDs = []string{ + "+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//", +} diff --git a/libgo/go/html/parse.go b/libgo/go/html/parse.go index 9b7e934ac34..97fbc514d82 100644 --- a/libgo/go/html/parse.go +++ b/libgo/go/html/parse.go @@ -37,6 +37,11 @@ type parser struct { // fosterParenting is whether new elements should be inserted according to // the foster parenting rules (section 11.2.5.3). fosterParenting bool + // quirks is whether the parser is operating in "quirks mode." + quirks bool + // context is the context element when parsing an HTML fragment + // (section 11.4). + context *Node } func (p *parser) top() *Node { @@ -285,9 +290,10 @@ func (p *parser) setOriginalIM() { func (p *parser) resetInsertionMode() { for i := len(p.oe) - 1; i >= 0; i-- { n := p.oe[i] - if i == 0 { - // TODO: set n to the context element, for HTML fragment parsing. + if i == 0 && p.context != nil { + n = p.context } + switch n.Data { case "select": p.im = inSelectIM @@ -319,9 +325,17 @@ func (p *parser) resetInsertionMode() { p.im = inBodyIM } +const whitespace = " \t\r\n\f" + // Section 11.2.5.4.1. func initialIM(p *parser) bool { switch p.tok.Type { + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } case CommentToken: p.doc.Add(&Node{ Type: CommentNode, @@ -329,15 +343,13 @@ func initialIM(p *parser) bool { }) return true case DoctypeToken: - p.doc.Add(&Node{ - Type: DoctypeNode, - Data: p.tok.Data, - }) + n, quirks := parseDoctype(p.tok.Data) + p.doc.Add(n) + p.quirks = quirks p.im = beforeHTMLIM return true } - // TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper, - // and so switching on "quirks mode" might belong in a different package. + p.quirks = true p.im = beforeHTMLIM return false } @@ -345,6 +357,12 @@ func initialIM(p *parser) bool { // Section 11.2.5.4.2. func beforeHTMLIM(p *parser) bool { switch p.tok.Type { + case TextToken: + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } case StartTagToken: if p.tok.Data == "html" { p.addElement(p.tok.Data, p.tok.Attr) @@ -383,7 +401,11 @@ func beforeHeadIM(p *parser) bool { case ErrorToken: implied = true case TextToken: - // TODO: distinguish whitespace text from others. + p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) + if len(p.tok.Data) == 0 { + // It was all whitespace, so ignore it. + return true + } implied = true case StartTagToken: switch p.tok.Data { @@ -417,8 +439,6 @@ func beforeHeadIM(p *parser) bool { return !implied } -const whitespace = " \t\r\n\f" - // Section 11.2.5.4.4. func inHeadIM(p *parser) bool { var ( @@ -441,6 +461,8 @@ func inHeadIM(p *parser) bool { implied = true case StartTagToken: switch p.tok.Data { + case "html": + return inBodyIM(p) case "base", "basefont", "bgsound", "command", "link", "meta": p.addElement(p.tok.Data, p.tok.Attr) p.oe.pop() @@ -450,6 +472,9 @@ func inHeadIM(p *parser) bool { p.setOriginalIM() p.im = textIM return true + case "head": + // Ignore the token. + return true default: implied = true } @@ -560,11 +585,30 @@ func copyAttributes(dst *Node, src Token) { func inBodyIM(p *parser) bool { switch p.tok.Type { case TextToken: + switch n := p.oe.top(); n.Data { + case "pre", "listing", "textarea": + if len(n.Child) == 0 { + // Ignore a newline at the start of a <pre> block. + d := p.tok.Data + if d != "" && d[0] == '\r' { + d = d[1:] + } + if d != "" && d[0] == '\n' { + d = d[1:] + } + if d == "" { + return true + } + p.tok.Data = d + } + } p.reconstructActiveFormattingElements() p.addText(p.tok.Data) p.framesetOK = false case StartTagToken: switch p.tok.Data { + case "html": + copyAttributes(p.oe[0], p.tok) case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul": p.popUntil(buttonScopeStopTags, "p") p.addElement(p.tok.Data, p.tok.Attr) @@ -589,6 +633,13 @@ func inBodyIM(p *parser) bool { case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u": p.reconstructActiveFormattingElements() p.addFormattingElement(p.tok.Data, p.tok.Attr) + case "nobr": + p.reconstructActiveFormattingElements() + if p.elementInScope(defaultScopeStopTags, "nobr") { + p.inBodyEndTagFormatting("nobr") + p.reconstructActiveFormattingElements() + } + p.addFormattingElement(p.tok.Data, p.tok.Attr) case "applet", "marquee", "object": p.reconstructActiveFormattingElements() p.addElement(p.tok.Data, p.tok.Attr) @@ -601,7 +652,9 @@ func inBodyIM(p *parser) bool { p.acknowledgeSelfClosingTag() p.framesetOK = false case "table": - p.popUntil(buttonScopeStopTags, "p") // TODO: skip this step in quirks mode. + if !p.quirks { + p.popUntil(buttonScopeStopTags, "p") + } p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false p.im = inTableIM @@ -721,6 +774,11 @@ func inBodyIM(p *parser) bool { p.oe.pop() p.oe.pop() p.form = nil + case "xmp": + p.popUntil(buttonScopeStopTags, "p") + p.reconstructActiveFormattingElements() + p.framesetOK = false + p.addElement(p.tok.Data, p.tok.Attr) case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr": // Ignore the token. default: @@ -1462,18 +1520,7 @@ func afterAfterFramesetIM(p *parser) bool { return true } -// Parse returns the parse tree for the HTML from the given Reader. -// The input is assumed to be UTF-8 encoded. -func Parse(r io.Reader) (*Node, error) { - p := &parser{ - tokenizer: NewTokenizer(r), - doc: &Node{ - Type: DocumentNode, - }, - scripting: true, - framesetOK: true, - im: initialIM, - } +func (p *parser) parse() error { // Iterate until EOF. Any other error will cause an early return. consumed := true for { @@ -1482,7 +1529,7 @@ func Parse(r io.Reader) (*Node, error) { if err == io.EOF { break } - return nil, err + return err } } consumed = p.im(p) @@ -1493,5 +1540,77 @@ func Parse(r io.Reader) (*Node, error) { break } } + return nil +} + +// Parse returns the parse tree for the HTML from the given Reader. +// The input is assumed to be UTF-8 encoded. +func Parse(r io.Reader) (*Node, error) { + p := &parser{ + tokenizer: NewTokenizer(r), + doc: &Node{ + Type: DocumentNode, + }, + scripting: true, + framesetOK: true, + im: initialIM, + } + err := p.parse() + if err != nil { + return nil, err + } return p.doc, nil } + +// ParseFragment parses a fragment of HTML and returns the nodes that were +// found. If the fragment is the InnerHTML for an existing element, pass that +// element in context. +func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { + p := &parser{ + tokenizer: NewTokenizer(r), + doc: &Node{ + Type: DocumentNode, + }, + scripting: true, + context: context, + } + + if context != nil { + switch context.Data { + case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp": + p.tokenizer.rawTag = context.Data + } + } + + root := &Node{ + Type: ElementNode, + Data: "html", + } + p.doc.Add(root) + p.oe = nodeStack{root} + p.resetInsertionMode() + + for n := context; n != nil; n = n.Parent { + if n.Type == ElementNode && n.Data == "form" { + p.form = n + break + } + } + + err := p.parse() + if err != nil { + return nil, err + } + + parent := p.doc + if context != nil { + parent = root + } + + result := parent.Child + parent.Child = nil + for _, n := range result { + n.Parent = nil + } + return result, nil +} diff --git a/libgo/go/html/parse_test.go b/libgo/go/html/parse_test.go index 4f15ae1d554..e0c19cff6da 100644 --- a/libgo/go/html/parse_test.go +++ b/libgo/go/html/parse_test.go @@ -10,65 +10,77 @@ import ( "errors" "fmt" "io" - "io/ioutil" "os" "strings" "testing" ) -func pipeErr(err error) io.Reader { - pr, pw := io.Pipe() - pw.CloseWithError(err) - return pr -} - -func readDat(filename string, c chan io.Reader) { - defer close(c) - f, err := os.Open("testdata/webkit/" + filename) +// readParseTest reads a single test case from r. +func readParseTest(r *bufio.Reader) (text, want, context string, err error) { + line, err := r.ReadSlice('\n') if err != nil { - c <- pipeErr(err) - return + return "", "", "", err } - defer f.Close() + var b []byte - // Loop through the lines of the file. Each line beginning with "#" denotes - // a new section, which is returned as a separate io.Reader. - r := bufio.NewReader(f) - var pw *io.PipeWriter + // Read the HTML. + if string(line) != "#data\n" { + return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line) + } for { - line, err := r.ReadSlice('\n') + line, err = r.ReadSlice('\n') if err != nil { - if pw != nil { - pw.CloseWithError(err) - pw = nil - } else { - c <- pipeErr(err) - } - return + return "", "", "", err } - if len(line) == 0 { - continue + if line[0] == '#' { + break + } + b = append(b, line...) + } + text = strings.TrimRight(string(b), "\n") + b = b[:0] + + // Skip the error list. + if string(line) != "#errors\n" { + return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line) + } + for { + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err } if line[0] == '#' { - if pw != nil { - pw.Close() - } - var pr *io.PipeReader - pr, pw = io.Pipe() - c <- pr - continue + break + } + } + + if string(line) == "#document-fragment\n" { + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err } - if line[0] != '|' { - // Strip the trailing '\n'. - line = line[:len(line)-1] + context = strings.TrimSpace(string(line)) + line, err = r.ReadSlice('\n') + if err != nil { + return "", "", "", err } - if pw != nil { - if _, err := pw.Write(line); err != nil { - pw.CloseWithError(err) - pw = nil - } + } + + // Read the dump of what the parse tree should be. + if string(line) != "#document\n" { + return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line) + } + for { + line, err = r.ReadSlice('\n') + if err != nil && err != io.EOF { + return "", "", "", err + } + if len(line) == 0 || len(line) == 1 && line[0] == '\n' { + break } + b = append(b, line...) } + return text, string(b), context, nil } func dumpIndent(w io.Writer, level int) { @@ -93,11 +105,27 @@ func dumpLevel(w io.Writer, n *Node, level int) error { fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val) } case TextNode: - fmt.Fprintf(w, "%q", n.Data) + fmt.Fprintf(w, `"%s"`, n.Data) case CommentNode: fmt.Fprintf(w, "<!-- %s -->", n.Data) case DoctypeNode: - fmt.Fprintf(w, "<!DOCTYPE %s>", n.Data) + fmt.Fprintf(w, "<!DOCTYPE %s", n.Data) + if n.Attr != nil { + var p, s string + for _, a := range n.Attr { + switch a.Key { + case "public": + p = a.Val + case "system": + s = a.Val + } + } + if p != "" || s != "" { + fmt.Fprintf(w, ` "%s"`, p) + fmt.Fprintf(w, ` "%s"`, s) + } + } + io.WriteString(w, ">") case scopeMarkerNode: return errors.New("unexpected scopeMarkerNode") default: @@ -133,46 +161,62 @@ func TestParser(t *testing.T) { n int }{ // TODO(nigeltao): Process all the test cases from all the .dat files. + {"doctype01.dat", -1}, {"tests1.dat", -1}, - {"tests2.dat", 43}, - {"tests3.dat", 0}, + {"tests2.dat", -1}, + {"tests3.dat", -1}, + {"tests4.dat", -1}, + {"tests5.dat", -1}, } for _, tf := range testFiles { - rc := make(chan io.Reader) - go readDat(tf.filename, rc) + f, err := os.Open("testdata/webkit/" + tf.filename) + if err != nil { + t.Fatal(err) + } + defer f.Close() + r := bufio.NewReader(f) for i := 0; i != tf.n; i++ { - // Parse the #data section. - dataReader := <-rc - if dataReader == nil { + text, want, context, err := readParseTest(r) + if err == io.EOF && tf.n == -1 { break } - b, err := ioutil.ReadAll(dataReader) if err != nil { t.Fatal(err) } - text := string(b) - doc, err := Parse(strings.NewReader(text)) - if err != nil { - t.Fatal(err) + + var doc *Node + if context == "" { + doc, err = Parse(strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + } else { + contextNode := &Node{ + Type: ElementNode, + Data: context, + } + nodes, err := ParseFragment(strings.NewReader(text), contextNode) + if err != nil { + t.Fatal(err) + } + doc = &Node{ + Type: DocumentNode, + } + for _, n := range nodes { + doc.Add(n) + } } + got, err := dump(doc) if err != nil { t.Fatal(err) } - // Skip the #error section. - if _, err := io.Copy(ioutil.Discard, <-rc); err != nil { - t.Fatal(err) - } // Compare the parsed tree to the #document section. - b, err = ioutil.ReadAll(<-rc) - if err != nil { - t.Fatal(err) - } - if want := string(b); got != want { + if got != want { t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", tf.filename, i, text, got, want) continue } - if renderTestBlacklist[text] { + if renderTestBlacklist[text] || context != "" { continue } // Check that rendering and re-parsing results in an identical tree. @@ -193,12 +237,6 @@ func TestParser(t *testing.T) { continue } } - // Drain any untested cases for the test file. - for r := range rc { - if _, err := ioutil.ReadAll(r); err != nil { - t.Fatal(err) - } - } } } diff --git a/libgo/go/html/render.go b/libgo/go/html/render.go index 92c349fb32c..7e1a4669657 100644 --- a/libgo/go/html/render.go +++ b/libgo/go/html/render.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "io" + "strings" ) type writer interface { @@ -98,6 +99,40 @@ func render1(w writer, n *Node) error { if _, err := w.WriteString(n.Data); err != nil { return err } + if n.Attr != nil { + var p, s string + for _, a := range n.Attr { + switch a.Key { + case "public": + p = a.Val + case "system": + s = a.Val + } + } + if p != "" { + if _, err := w.WriteString(" PUBLIC "); err != nil { + return err + } + if err := writeQuoted(w, p); err != nil { + return err + } + if s != "" { + if err := w.WriteByte(' '); err != nil { + return err + } + if err := writeQuoted(w, s); err != nil { + return err + } + } + } else if s != "" { + if _, err := w.WriteString(" SYSTEM "); err != nil { + return err + } + if err := writeQuoted(w, s); err != nil { + return err + } + } + } return w.WriteByte('>') default: return errors.New("html: unknown node type") @@ -138,9 +173,19 @@ func render1(w writer, n *Node) error { return err } + // Add initial newline where there is danger of a newline beging ignored. + if len(n.Child) > 0 && n.Child[0].Type == TextNode && strings.HasPrefix(n.Child[0].Data, "\n") { + switch n.Data { + case "pre", "listing", "textarea": + if err := w.WriteByte('\n'); err != nil { + return err + } + } + } + // Render any child nodes. switch n.Data { - case "noembed", "noframes", "noscript", "plaintext", "script", "style": + case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": for _, c := range n.Child { if c.Type != TextNode { return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data) @@ -181,6 +226,27 @@ func render1(w writer, n *Node) error { return w.WriteByte('>') } +// writeQuoted writes s to w surrounded by quotes. Normally it will use double +// quotes, but if s contains a double quote, it will use single quotes. +// It is used for writing the identifiers in a doctype declaration. +// In valid HTML, they can't contain both types of quotes. +func writeQuoted(w writer, s string) error { + var q byte = '"' + if strings.Contains(s, `"`) { + q = '\'' + } + if err := w.WriteByte(q); err != nil { + return err + } + if _, err := w.WriteString(s); err != nil { + return err + } + if err := w.WriteByte(q); err != nil { + return err + } + return nil +} + // Section 13.1.2, "Elements", gives this list of void elements. Void elements // are those that can't have any contents. var voidElements = map[string]bool{ diff --git a/libgo/go/html/template/clone_test.go b/libgo/go/html/template/clone_test.go index ed1698acd8b..39788173b99 100644 --- a/libgo/go/html/template/clone_test.go +++ b/libgo/go/html/template/clone_test.go @@ -7,8 +7,6 @@ package template import ( "bytes" "testing" - "text/template" - "text/template/parse" ) func TestClone(t *testing.T) { @@ -48,15 +46,20 @@ func TestClone(t *testing.T) { } for _, test := range tests { - s := template.Must(template.New("s").Parse(test.input)) - d := template.New("d") - d.Tree = &parse.Tree{Name: d.Name(), Root: cloneList(s.Root)} + s, err := New("s").Parse(test.input) + if err != nil { + t.Errorf("input=%q: unexpected parse error %v", test.input, err) + } + + d, _ := New("d").Parse(test.input) + // Hack: just replace the root of the tree. + d.text.Root = cloneList(s.text.Root) - if want, got := s.Root.String(), d.Root.String(); want != got { + if want, got := s.text.Root.String(), d.text.Root.String(); want != got { t.Errorf("want %q, got %q", want, got) } - err := escape(d) + err = escapeTemplates(d, "d") if err != nil { t.Errorf("%q: failed to escape: %s", test.input, err) continue @@ -73,18 +76,17 @@ func TestClone(t *testing.T) { data := []string{"foo", "<bar>", "baz"} - // Make sure escaping d did not affect s. var b bytes.Buffer - s.Execute(&b, data) - if got := b.String(); got != test.want { - t.Errorf("%q: want %q, got %q", test.input, test.want, got) - continue + d.Execute(&b, data) + if got := b.String(); got != test.wantClone { + t.Errorf("input=%q: want %q, got %q", test.input, test.wantClone, got) } + // Make sure escaping d did not affect s. b.Reset() - d.Execute(&b, data) - if got := b.String(); got != test.wantClone { - t.Errorf("%q: want %q, got %q", test.input, test.wantClone, got) + s.text.Execute(&b, data) + if got := b.String(); got != test.want { + t.Errorf("input=%q: want %q, got %q", test.input, test.want, got) } } } diff --git a/libgo/go/html/template/content.go b/libgo/go/html/template/content.go index 3fb15a6e93f..4de7ccde912 100644 --- a/libgo/go/html/template/content.go +++ b/libgo/go/html/template/content.go @@ -12,10 +12,10 @@ import ( // Strings of content from a trusted source. type ( // CSS encapsulates known safe content that matches any of: - // (1) The CSS3 stylesheet production, such as `p { color: purple }`. - // (2) The CSS3 rule production, such as `a[href=~"https:"].foo#bar`. - // (3) CSS3 declaration productions, such as `color: red; margin: 2px`. - // (4) The CSS3 value production, such as `rgba(0, 0, 255, 127)`. + // 1. The CSS3 stylesheet production, such as `p { color: purple }`. + // 2. The CSS3 rule production, such as `a[href=~"https:"].foo#bar`. + // 3. CSS3 declaration productions, such as `color: red; margin: 2px`. + // 4. The CSS3 value production, such as `rgba(0, 0, 255, 127)`. // See http://www.w3.org/TR/css3-syntax/#style CSS string @@ -41,8 +41,8 @@ type ( // JSStr encapsulates a sequence of characters meant to be embedded // between quotes in a JavaScript expression. // The string must match a series of StringCharacters: - // StringCharacter :: SourceCharacter but not `\` or LineTerminator - // | EscapeSequence + // StringCharacter :: SourceCharacter but not `\` or LineTerminator + // | EscapeSequence // Note that LineContinuations are not allowed. // JSStr("foo\\nbar") is fine, but JSStr("foo\\\nbar") is not. JSStr string diff --git a/libgo/go/html/template/doc.go b/libgo/go/html/template/doc.go index 0324c9c0ee3..fc0e3826442 100644 --- a/libgo/go/html/template/doc.go +++ b/libgo/go/html/template/doc.go @@ -13,9 +13,9 @@ Introduction This package wraps package template so you can use the standard template API to parse and execute templates. - set, err := new(template.Set).Parse(...) - // Error checking elided - err = set.Execute(out, "Foo", data) + set, err := new(template.Set).Parse(...) + // Error checking elided + err = set.Execute(out, "Foo", data) If successful, set will now be injection-safe. Otherwise, err is an error defined in the docs for ErrorCode. @@ -29,25 +29,25 @@ trusted, while Execute's data parameter is not. More details are provided below. Example - import "template" - ... - t, err := (&template.Set{}).Parse(`{{define "T"}}Hello, {{.}}!{{end}}`) - err = t.Execute(out, "T", "<script>alert('you have been pwned')</script>") + import "text/template" + ... + t, err := (&template.Set{}).Parse(`{{define "T"}}Hello, {{.}}!{{end}}`) + err = t.Execute(out, "T", "<script>alert('you have been pwned')</script>") produces - Hello, <script>alert('you have been pwned')</script>! + Hello, <script>alert('you have been pwned')</script>! but with contextual autoescaping, - import "html/template" - ... - t, err := (&template.Set{}).Parse(`{{define "T"}}Hello, {{.}}!{{end}}`) - err = t.Execute(out, "T", "<script>alert('you have been pwned')</script>") + import "html/template" + ... + t, err := (&template.Set{}).Parse(`{{define "T"}}Hello, {{.}}!{{end}}`) + err = t.Execute(out, "T", "<script>alert('you have been pwned')</script>") produces safe, escaped HTML output - Hello, <script>alert('you have been pwned')</script>! + Hello, <script>alert('you have been pwned')</script>! Contexts @@ -80,36 +80,36 @@ Contexts Assuming {{.}} is `O'Reilly: How are <i>you</i>?`, the table below shows how {{.}} appears when used in the context to the left. -Context {{.}} After -{{.}} O'Reilly: How are <i>you</i>? -<a title='{{.}}'> O'Reilly: How are you? -<a href="/{{.}}"> O'Reilly: How are %3ci%3eyou%3c/i%3e? -<a href="?q={{.}}"> O'Reilly%3a%20How%20are%3ci%3e...%3f -<a onx='f("{{.}}")'> O\x27Reilly: How are \x3ci\x3eyou...? -<a onx='f({{.}})'> "O\x27Reilly: How are \x3ci\x3eyou...?" -<a onx='pattern = /{{.}}/;'> O\x27Reilly: How are \x3ci\x3eyou...\x3f + Context {{.}} After + {{.}} O'Reilly: How are <i>you</i>? + <a title='{{.}}'> O'Reilly: How are you? + <a href="/{{.}}"> O'Reilly: How are %3ci%3eyou%3c/i%3e? + <a href="?q={{.}}"> O'Reilly%3a%20How%20are%3ci%3e...%3f + <a onx='f("{{.}}")'> O\x27Reilly: How are \x3ci\x3eyou...? + <a onx='f({{.}})'> "O\x27Reilly: How are \x3ci\x3eyou...?" + <a onx='pattern = /{{.}}/;'> O\x27Reilly: How are \x3ci\x3eyou...\x3f If used in an unsafe context, then the value might be filtered out: -Context {{.}} After -<a href="{{.}}"> #ZgotmplZ + Context {{.}} After + <a href="{{.}}"> #ZgotmplZ since "O'Reilly:" is not an allowed protocol like "http:". If {{.}} is the innocuous word, `left`, then it can appear more widely, -Context {{.}} After -{{.}} left -<a title='{{.}}'> left -<a href='{{.}}'> left -<a href='/{{.}}'> left -<a href='?dir={{.}}'> left -<a style="border-{{.}}: 4px"> left -<a style="align: {{.}}"> left -<a style="background: '{{.}}'> left -<a style="background: url('{{.}}')> left -<style>p.{{.}} {color:red}</style> left + Context {{.}} After + {{.}} left + <a title='{{.}}'> left + <a href='{{.}}'> left + <a href='/{{.}}'> left + <a href='?dir={{.}}'> left + <a style="border-{{.}}: 4px"> left + <a style="align: {{.}}"> left + <a style="background: '{{.}}'> left + <a style="background: url('{{.}}')> left + <style>p.{{.}} {color:red}</style> left Non-string values can be used in JavaScript contexts. If {{.}} is diff --git a/libgo/go/html/template/escape.go b/libgo/go/html/template/escape.go index 8ac07eae24c..4a7a9354c93 100644 --- a/libgo/go/html/template/escape.go +++ b/libgo/go/html/template/escape.go @@ -12,24 +12,15 @@ import ( "text/template/parse" ) -// escape rewrites each action in the template to guarantee that the output is -// properly escaped. -func escape(t *template.Template) error { - var s template.Set - s.Add(t) - return escapeSet(&s, t.Name()) - // TODO: if s contains cloned dependencies due to self-recursion - // cross-context, error out. -} - -// escapeSet rewrites the template set to guarantee that the output of any of -// the named templates is properly escaped. -// Names should include the names of all templates that might be Executed but -// need not include helper templates. -// If no error is returned, then the named templates have been modified. -// Otherwise the named templates have been rendered unusable. -func escapeSet(s *template.Set, names ...string) error { - e := newEscaper(s) +// escapeTemplates rewrites the named templates, which must be +// associated with t, to guarantee that the output of any of the named +// templates is properly escaped. Names should include the names of +// all templates that might be Executed but need not include helper +// templates. If no error is returned, then the named templates have +// been modified. Otherwise the named templates have been rendered +// unusable. +func escapeTemplates(tmpl *Template, names ...string) error { + e := newEscaper(tmpl) for _, name := range names { c, _ := e.escapeTree(context{}, name, 0) var err error @@ -41,12 +32,13 @@ func escapeSet(s *template.Set, names ...string) error { if err != nil { // Prevent execution of unsafe templates. for _, name := range names { - if t := s.Template(name); t != nil { - t.Tree = nil + if t := tmpl.set[name]; t != nil { + t.text.Tree = nil } } return err } + tmpl.escaped = true } e.commit() return nil @@ -83,8 +75,7 @@ var equivEscapers = map[string]string{ // escaper collects type inferences about templates and changes needed to make // templates injection safe. type escaper struct { - // set is the template set being escaped. - set *template.Set + tmpl *Template // output[templateName] is the output context for a templateName that // has been mangled to include its input context. output map[string]context @@ -102,9 +93,9 @@ type escaper struct { } // newEscaper creates a blank escaper for the given set. -func newEscaper(s *template.Set) *escaper { +func newEscaper(t *Template) *escaper { return &escaper{ - s, + t, map[string]context{}, map[string]*template.Template{}, map[string]bool{}, @@ -442,7 +433,7 @@ func (e *escaper) escapeList(c context, n *parse.ListNode) context { // It returns the best guess at an output context, and the result of the filter // which is the same as whether e was updated. func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) { - e1 := newEscaper(e.set) + e1 := newEscaper(e.tmpl) // Make type inferences available to f. for k, v := range e.output { e1.output[k] = v @@ -501,7 +492,7 @@ func (e *escaper) escapeTree(c context, name string, line int) (context, string) }, dname } if dname != name { - // Use any template derived during an earlier call to escapeSet + // Use any template derived during an earlier call to escapeTemplate // with different top level templates, or clone if necessary. dt := e.template(dname) if dt == nil { @@ -529,7 +520,7 @@ func (e *escaper) computeOutCtx(c context, t *template.Template) context { if !ok && c1.state != stateError { return context{ state: stateError, - // TODO: Find the first node with a line in t.Tree.Root + // TODO: Find the first node with a line in t.text.Tree.Root err: errorf(ErrOutputContext, 0, "cannot compute output context for template %s", t.Name()), } } @@ -729,7 +720,9 @@ func (e *escaper) commit() { e.template(name).Funcs(funcMap) } for _, t := range e.derived { - e.set.Add(t) + if _, err := e.tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil { + panic("error adding derived template") + } } for n, s := range e.actionNodeEdits { ensurePipelineContains(n.Pipe, s) @@ -744,7 +737,7 @@ func (e *escaper) commit() { // template returns the named template given a mangled template name. func (e *escaper) template(name string) *template.Template { - t := e.set.Template(name) + t := e.tmpl.text.Lookup(name) if t == nil { t = e.derived[name] } diff --git a/libgo/go/html/template/escape_test.go b/libgo/go/html/template/escape_test.go index 4af583097bd..b4daca7d6bd 100644 --- a/libgo/go/html/template/escape_test.go +++ b/libgo/go/html/template/escape_test.go @@ -806,13 +806,15 @@ func TestEscapeSet(t *testing.T) { for name, body := range test.inputs { source += fmt.Sprintf("{{define %q}}%s{{end}} ", name, body) } - s := &Set{} - s.Funcs(fns) - s.Parse(source) + tmpl, err := New("root").Funcs(fns).Parse(source) + if err != nil { + t.Errorf("error parsing %q: %v", source, err) + continue + } var b bytes.Buffer - if err := s.Execute(&b, "main", data); err != nil { - t.Errorf("%q executing %v", err.Error(), s.Template("main")) + if err := tmpl.ExecuteTemplate(&b, "main", data); err != nil { + t.Errorf("%q executing %v", err.Error(), tmpl.Lookup("main")) continue } if got := b.String(); test.want != got { @@ -929,13 +931,13 @@ func TestErrors(t *testing.T) { "z:1: no such template foo", }, { - `{{define "z"}}<div{{template "y"}}>{{end}}` + + `<div{{template "y"}}>` + // Illegal starting in stateTag but not in stateText. `{{define "y"}} foo<b{{end}}`, `"<" in attribute name: " foo<b"`, }, { - `{{define "z"}}<script>reverseList = [{{template "t"}}]</script>{{end}}` + + `<script>reverseList = [{{template "t"}}]</script>` + // Missing " after recursive call. `{{define "t"}}{{if .Tail}}{{template "t" .Tail}}{{end}}{{.Head}}",{{end}}`, `: cannot compute output context for template t$htmltemplate_stateJS_elementScript`, @@ -967,21 +969,13 @@ func TestErrors(t *testing.T) { } for _, test := range tests { - var err error buf := new(bytes.Buffer) - if strings.HasPrefix(test.input, "{{define") { - var s *Set - s, err = (&Set{}).Parse(test.input) - if err == nil { - err = s.Execute(buf, "z", nil) - } - } else { - var t *Template - t, err = New("z").Parse(test.input) - if err == nil { - err = t.Execute(buf, nil) - } + tmpl, err := New("z").Parse(test.input) + if err != nil { + t.Errorf("input=%q: unexpected parse error %s\n", test.input, err) + continue } + err = tmpl.Execute(buf, nil) var got string if err != nil { got = err.Error() @@ -1569,11 +1563,11 @@ func TestEscapeErrorsNotIgnorable(t *testing.T) { func TestEscapeSetErrorsNotIgnorable(t *testing.T) { var b bytes.Buffer - s, err := (&Set{}).Parse(`{{define "t"}}<a{{end}}`) + tmpl, err := New("root").Parse(`{{define "t"}}<a{{end}}`) if err != nil { t.Errorf("failed to parse set: %q", err) } - err = s.Execute(&b, "t", nil) + err = tmpl.ExecuteTemplate(&b, "t", nil) if err == nil { t.Errorf("Expected error") } else if b.Len() != 0 { diff --git a/libgo/go/html/template/template.go b/libgo/go/html/template/template.go index 47334299384..f05ca190f73 100644 --- a/libgo/go/html/template/template.go +++ b/libgo/go/html/template/template.go @@ -7,233 +7,257 @@ package template import ( "fmt" "io" + "io/ioutil" "path/filepath" + "sync" "text/template" + "text/template/parse" ) -// Set is a specialized template.Set that produces a safe HTML document -// fragment. -type Set struct { - escaped map[string]bool - template.Set -} - // Template is a specialized template.Template that produces a safe HTML // document fragment. type Template struct { escaped bool - *template.Template + // We could embed the text/template field, but it's safer not to because + // we need to keep our version of the name space and the underlying + // template's in sync. + text *template.Template + *nameSpace // common to all associated templates } -// Execute applies the named template to the specified data object, writing -// the output to wr. -func (s *Set) Execute(wr io.Writer, name string, data interface{}) error { - if !s.escaped[name] { - if err := escapeSet(&s.Set, name); err != nil { - return err - } - if s.escaped == nil { - s.escaped = make(map[string]bool) +// nameSpace is the data structure shared by all templates in an association. +type nameSpace struct { + mu sync.Mutex + set map[string]*Template +} + +// Execute applies a parsed template to the specified data object, +// writing the output to wr. +func (t *Template) Execute(wr io.Writer, data interface{}) (err error) { + t.nameSpace.mu.Lock() + if !t.escaped { + if err = escapeTemplates(t, t.Name()); err != nil { + t.escaped = true } - s.escaped[name] = true } - return s.Set.Execute(wr, name, data) + t.nameSpace.mu.Unlock() + if err != nil { + return + } + return t.text.Execute(wr, data) } -// Parse parses a string into a set of named templates. Parse may be called -// multiple times for a given set, adding the templates defined in the string -// to the set. If a template is redefined, the element in the set is -// overwritten with the new definition. -func (set *Set) Parse(src string) (*Set, error) { - set.escaped = nil - s, err := set.Set.Parse(src) - if err != nil { - return nil, err +// ExecuteTemplate applies the template associated with t that has the given name +// to the specified data object and writes the output to wr. +func (t *Template) ExecuteTemplate(wr io.Writer, name string, data interface{}) (err error) { + t.nameSpace.mu.Lock() + tmpl := t.set[name] + if tmpl == nil { + t.nameSpace.mu.Unlock() + return fmt.Errorf("template: no template %q associated with template %q", name, t.Name()) } - if s != &(set.Set) { - panic("allocated new set") + if !tmpl.escaped { + err = escapeTemplates(tmpl, name) + } + t.nameSpace.mu.Unlock() + if err != nil { + return } - return set, nil + return tmpl.text.ExecuteTemplate(wr, name, data) } -// Parse parses the template definition string to construct an internal -// representation of the template for execution. -func (tmpl *Template) Parse(src string) (*Template, error) { - tmpl.escaped = false - t, err := tmpl.Template.Parse(src) +// Parse parses a string into a template. Nested template definitions +// will be associated with the top-level template t. Parse may be +// called multiple times to parse definitions of templates to associate +// with t. It is an error if a resulting template is non-empty (contains +// content other than template definitions) and would replace a +// non-empty template with the same name. (In multiple calls to Parse +// with the same receiver template, only one call can contain text +// other than space, comments, and template definitions.) +func (t *Template) Parse(src string) (*Template, error) { + t.nameSpace.mu.Lock() + t.escaped = false + t.nameSpace.mu.Unlock() + ret, err := t.text.Parse(src) if err != nil { return nil, err } - tmpl.Template = t - return tmpl, nil -} - -// Execute applies a parsed template to the specified data object, -// writing the output to wr. -func (t *Template) Execute(wr io.Writer, data interface{}) error { - if !t.escaped { - if err := escape(t.Template); err != nil { - return err + // In general, all the named templates might have changed underfoot. + // Regardless, some new ones may have been defined. + // The template.Template set has been updated; update ours. + t.nameSpace.mu.Lock() + defer t.nameSpace.mu.Unlock() + for _, v := range ret.Templates() { + name := v.Name() + tmpl := t.set[name] + if tmpl == nil { + tmpl = t.new(name) } - t.escaped = true + tmpl.escaped = false + tmpl.text = v } - return t.Template.Execute(wr, data) + return t, nil +} + +// AddParseTree is unimplemented. +func (t *Template) AddParseTree(name string, tree *parse.Tree) error { + return fmt.Errorf("html/template: AddParseTree unimplemented") +} + +// Clone is unimplemented. +func (t *Template) Clone(name string) error { + return fmt.Errorf("html/template: Add unimplemented") } // New allocates a new HTML template with the given name. func New(name string) *Template { - return &Template{false, template.New(name)} + tmpl := &Template{ + false, + template.New(name), + &nameSpace{ + set: make(map[string]*Template), + }, + } + tmpl.set[name] = tmpl + return tmpl } -// Must panics if err is non-nil in the same way as template.Must. -func Must(t *Template, err error) *Template { - t.Template = template.Must(t.Template, err) - return t +// New allocates a new HTML template associated with the given one +// and with the same delimiters. The association, which is transitive, +// allows one template to invoke another with a {{template}} action. +func (t *Template) New(name string) *Template { + t.nameSpace.mu.Lock() + defer t.nameSpace.mu.Unlock() + return t.new(name) } -// ParseFile creates a new Template and parses the template definition from -// the named file. The template name is the base name of the file. -func ParseFile(filename string) (*Template, error) { - t, err := template.ParseFile(filename) - if err != nil { - return nil, err +// new is the implementation of New, without the lock. +func (t *Template) new(name string) *Template { + tmpl := &Template{ + false, + t.text.New(name), + t.nameSpace, } - return &Template{false, t}, nil + tmpl.set[name] = tmpl + return tmpl } -// ParseFile reads the template definition from a file and parses it to -// construct an internal representation of the template for execution. -// The returned template will be nil if an error occurs. -func (tmpl *Template) ParseFile(filename string) (*Template, error) { - t, err := tmpl.Template.ParseFile(filename) - if err != nil { - return nil, err - } - tmpl.Template = t - return tmpl, nil +// Name returns the name of the template. +func (t *Template) Name() string { + return t.text.Name() } -// SetMust panics if the error is non-nil just like template.SetMust. -func SetMust(s *Set, err error) *Set { - if err != nil { - template.SetMust(&(s.Set), err) - } - return s +// Funcs adds the elements of the argument map to the template's function map. +// It panics if a value in the map is not a function with appropriate return +// type. However, it is legal to overwrite elements of the map. The return +// value is the template, so calls can be chained. +func (t *Template) Funcs(funcMap template.FuncMap) *Template { + t.text.Funcs(funcMap) + return t } -// ParseFiles parses the named files into a set of named templates. -// Each file must be parseable by itself. -// If an error occurs, parsing stops and the returned set is nil. -func (set *Set) ParseFiles(filenames ...string) (*Set, error) { - s, err := set.Set.ParseFiles(filenames...) - if err != nil { - return nil, err - } - if s != &(set.Set) { - panic("allocated new set") - } - return set, nil +// Delims sets the action delimiters to the specified strings, to be used in +// subsequent calls to Parse, ParseFiles, or ParseGlob. Nested template +// definitions will inherit the settings. An empty delimiter stands for the +// corresponding default: {{ or }}. +// The return value is the template, so calls can be chained. +func (t *Template) Delims(left, right string) *Template { + t.text.Delims(left, right) + return t } -// ParseSetFiles creates a new Set and parses the set definition from the -// named files. Each file must be individually parseable. -func ParseSetFiles(filenames ...string) (*Set, error) { - set := new(Set) - s, err := set.Set.ParseFiles(filenames...) - if err != nil { - return nil, err - } - if s != &(set.Set) { - panic("allocated new set") - } - return set, nil +// Lookup returns the template with the given name that is associated with t, +// or nil if there is no such template. +func (t *Template) Lookup(name string) *Template { + t.nameSpace.mu.Lock() + defer t.nameSpace.mu.Unlock() + return t.set[name] } -// ParseGlob parses the set definition from the files identified by the -// pattern. The pattern is processed by filepath.Glob and must match at -// least one file. -// If an error occurs, parsing stops and the returned set is nil. -func (s *Set) ParseGlob(pattern string) (*Set, error) { - filenames, err := filepath.Glob(pattern) - if err != nil { - return nil, err - } - if len(filenames) == 0 { - return nil, fmt.Errorf("pattern matches no files: %#q", pattern) - } - return s.ParseFiles(filenames...) +// Must panics if err is non-nil in the same way as template.Must. +func Must(t *Template, err error) *Template { + t.text = template.Must(t.text, err) + return t } -// ParseSetGlob creates a new Set and parses the set definition from the -// files identified by the pattern. The pattern is processed by filepath.Glob -// and must match at least one file. -func ParseSetGlob(pattern string) (*Set, error) { - set, err := new(Set).ParseGlob(pattern) - if err != nil { - return nil, err - } - return set, nil +// ParseFiles creates a new Template and parses the template definitions from +// the named files. The returned template's name will have the (base) name and +// (parsed) contents of the first file. There must be at least one file. +// If an error occurs, parsing stops and the returned *Template is nil. +func ParseFiles(filenames ...string) (*Template, error) { + return parseFiles(nil, filenames...) } -// Functions and methods to parse stand-alone template files into a set. +// ParseFiles parses the named files and associates the resulting templates with +// t. If an error occurs, parsing stops and the returned template is nil; +// otherwise it is t. There must be at least one file. +func (t *Template) ParseFiles(filenames ...string) (*Template, error) { + return parseFiles(t, filenames...) +} -// ParseTemplateFiles parses the named template files and adds them to the set -// in the same way as template.ParseTemplateFiles but ensures that templates -// with upper-case names are contextually-autoescaped. -func (set *Set) ParseTemplateFiles(filenames ...string) (*Set, error) { - s, err := set.Set.ParseTemplateFiles(filenames...) - if err != nil { - return nil, err +// parseFiles is the helper for the method and function. If the argument +// template is nil, it is created from the first file. +func parseFiles(t *Template, filenames ...string) (*Template, error) { + if len(filenames) == 0 { + // Not really a problem, but be consistent. + return nil, fmt.Errorf("template: no files named in call to ParseFiles") } - if s != &(set.Set) { - panic("new set allocated") + for _, filename := range filenames { + b, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + s := string(b) + name := filepath.Base(filename) + // First template becomes return value if not already defined, + // and we use that one for subsequent New calls to associate + // all the templates together. Also, if this file has the same name + // as t, this file becomes the contents of t, so + // t, err := New(name).Funcs(xxx).ParseFiles(name) + // works. Otherwise we create a new template associated with t. + var tmpl *Template + if t == nil { + t = New(name) + } + if name == t.Name() { + tmpl = t + } else { + tmpl = t.New(name) + } + _, err = tmpl.Parse(s) + if err != nil { + return nil, err + } } - return set, nil -} - -// ParseTemplateGlob parses the template files matched by the -// patern and adds them to the set. Each template will be named -// the base name of its file. -// Unlike with ParseGlob, each file should be a stand-alone template -// definition suitable for Template.Parse (not Set.Parse); that is, the -// file does not contain {{define}} clauses. ParseTemplateGlob is -// therefore equivalent to calling the ParseFile function to create -// individual templates, which are then added to the set. -// Each file must be parseable by itself. -// If an error occurs, parsing stops and the returned set is nil. -func (s *Set) ParseTemplateGlob(pattern string) (*Set, error) { + return t, nil +} + +// ParseGlob creates a new Template and parses the template definitions from the +// files identified by the pattern, which must match at least one file. The +// returned template will have the (base) name and (parsed) contents of the +// first file matched by the pattern. ParseGlob is equivalent to calling +// ParseFiles with the list of files matched by the pattern. +func ParseGlob(pattern string) (*Template, error) { + return parseGlob(nil, pattern) +} + +// ParseGlob parses the template definitions in the files identified by the +// pattern and associates the resulting templates with t. The pattern is +// processed by filepath.Glob and must match at least one file. ParseGlob is +// equivalent to calling t.ParseFiles with the list of files matched by the +// pattern. +func (t *Template) ParseGlob(pattern string) (*Template, error) { + return parseGlob(t, pattern) +} + +// parseGlob is the implementation of the function and method ParseGlob. +func parseGlob(t *Template, pattern string) (*Template, error) { filenames, err := filepath.Glob(pattern) if err != nil { return nil, err } - return s.ParseTemplateFiles(filenames...) -} - -// ParseTemplateFiles creates a set by parsing the named files, -// each of which defines a single template. Each template will be -// named the base name of its file. -// Unlike with ParseFiles, each file should be a stand-alone template -// definition suitable for Template.Parse (not Set.Parse); that is, the -// file does not contain {{define}} clauses. ParseTemplateFiles is -// therefore equivalent to calling the ParseFile function to create -// individual templates, which are then added to the set. -// Each file must be parseable by itself. Parsing stops if an error is -// encountered. -func ParseTemplateFiles(filenames ...string) (*Set, error) { - return new(Set).ParseTemplateFiles(filenames...) -} - -// ParseTemplateGlob creates a set by parsing the files matched -// by the pattern, each of which defines a single template. The pattern -// is processed by filepath.Glob and must match at least one file. Each -// template will be named the base name of its file. -// Unlike with ParseGlob, each file should be a stand-alone template -// definition suitable for Template.Parse (not Set.Parse); that is, the -// file does not contain {{define}} clauses. ParseTemplateGlob is -// therefore equivalent to calling the ParseFile function to create -// individual templates, which are then added to the set. -// Each file must be parseable by itself. Parsing stops if an error is -// encountered. -func ParseTemplateGlob(pattern string) (*Set, error) { - return new(Set).ParseTemplateGlob(pattern) + if len(filenames) == 0 { + return nil, fmt.Errorf("template: pattern matches no files: %#q", pattern) + } + return parseFiles(t, filenames...) } diff --git a/libgo/go/html/token.go b/libgo/go/html/token.go index 9400873e6b8..69af96840c2 100644 --- a/libgo/go/html/token.go +++ b/libgo/go/html/token.go @@ -289,7 +289,11 @@ func (z *Tokenizer) readComment() { for dashCount := 2; ; { c := z.readByte() if z.err != nil { - z.data.end = z.raw.end + // Ignore up to two dashes at EOF. + if dashCount > 2 { + dashCount = 2 + } + z.data.end = z.raw.end - dashCount return } switch c { @@ -375,6 +379,28 @@ func (z *Tokenizer) readMarkupDeclaration() TokenType { return DoctypeToken } +// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end] +// case-insensitively matches any element of ss. +func (z *Tokenizer) startTagIn(ss ...string) bool { +loop: + for _, s := range ss { + if z.data.end-z.data.start != len(s) { + continue loop + } + for i := 0; i < len(s); i++ { + c := z.buf[z.data.start+i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + if c != s[i] { + continue loop + } + } + return true + } + return false +} + // readStartTag reads the next start tag token. The opening "<a" has already // been consumed, where 'a' means anything in [A-Za-z]. func (z *Tokenizer) readStartTag() TokenType { @@ -401,17 +427,27 @@ func (z *Tokenizer) readStartTag() TokenType { break } } - // Any "<noembed>", "<noframes>", "<noscript>", "<plaintext", "<script>", "<style>", - // "<textarea>" or "<title>" tag flags the tokenizer's next token as raw. - // The tag name lengths of these special cases ranges in [5, 9]. - if x := z.data.end - z.data.start; 5 <= x && x <= 9 { - switch z.buf[z.data.start] { - case 'n', 'p', 's', 't', 'N', 'P', 'S', 'T': - switch s := strings.ToLower(string(z.buf[z.data.start:z.data.end])); s { - case "noembed", "noframes", "noscript", "plaintext", "script", "style", "textarea", "title": - z.rawTag = s - } - } + // Several tags flag the tokenizer's next token as raw. + c, raw := z.buf[z.data.start], false + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + switch c { + case 'i': + raw = z.startTagIn("iframe") + case 'n': + raw = z.startTagIn("noembed", "noframes", "noscript") + case 'p': + raw = z.startTagIn("plaintext") + case 's': + raw = z.startTagIn("script", "style") + case 't': + raw = z.startTagIn("textarea", "title") + case 'x': + raw = z.startTagIn("xmp") + } + if raw { + z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end])) } // Look for a self-closing token like "<br/>". if z.err == nil && z.buf[z.raw.end-2] == '/' { diff --git a/libgo/go/html/token_test.go b/libgo/go/html/token_test.go index 61d4e67c06d..672d60c4209 100644 --- a/libgo/go/html/token_test.go +++ b/libgo/go/html/token_test.go @@ -325,6 +325,26 @@ var tokenTests = []tokenTest{ }, { "comment9", + "a<!--z-", + "a$<!--z-->", + }, + { + "comment10", + "a<!--z--", + "a$<!--z-->", + }, + { + "comment11", + "a<!--z---", + "a$<!--z--->", + }, + { + "comment12", + "a<!--z----", + "a$<!--z---->", + }, + { + "comment13", "a<!--x--!>z", "a$<!--x-->$z", }, |