diff options
author | ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-12-12 23:40:51 +0000 |
---|---|---|
committer | ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-12-12 23:40:51 +0000 |
commit | 46bc1fe2f5a3c7ee4273b99d700fecf1555f64e0 (patch) | |
tree | 0c68629fac9d7c6f103b401c9063ef00ed259f06 /libgo/go/html | |
parent | dc15d51eb56c0efe84bf82b02ac2ba9a231ce69b (diff) | |
download | gcc-46bc1fe2f5a3c7ee4273b99d700fecf1555f64e0.tar.gz |
libgo: Update to weekly.2011-11-18.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@182266 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgo/go/html')
-rw-r--r-- | libgo/go/html/doc.go | 4 | ||||
-rw-r--r-- | libgo/go/html/parse.go | 590 | ||||
-rw-r--r-- | libgo/go/html/parse_test.go | 8 | ||||
-rw-r--r-- | libgo/go/html/render.go | 25 | ||||
-rw-r--r-- | libgo/go/html/template/content.go | 21 | ||||
-rw-r--r-- | libgo/go/html/template/escape_test.go | 35 | ||||
-rw-r--r-- | libgo/go/html/template/js.go | 20 | ||||
-rw-r--r-- | libgo/go/html/token.go | 26 |
8 files changed, 556 insertions, 173 deletions
diff --git a/libgo/go/html/doc.go b/libgo/go/html/doc.go index 1bea690c2c8..56b194ffb90 100644 --- a/libgo/go/html/doc.go +++ b/libgo/go/html/doc.go @@ -37,7 +37,7 @@ lower-cased, and attributes are collected into a []Attribute. For example: for { if z.Next() == html.ErrorToken { // Returning io.EOF indicates success. - return z.Error() + return z.Err() } emitToken(z.Token()) } @@ -51,7 +51,7 @@ call to Next. For example, to extract an HTML page's anchor text: tt := z.Next() switch tt { case ErrorToken: - return z.Error() + return z.Err() case TextToken: if depth > 0 { // emitBytes should copy the []byte it receives, diff --git a/libgo/go/html/parse.go b/libgo/go/html/parse.go index f47d4ea147c..9b7e934ac34 100644 --- a/libgo/go/html/parse.go +++ b/libgo/go/html/parse.go @@ -29,6 +29,8 @@ type parser struct { head, form *Node // Other parsing state flags (section 11.2.3.5). scripting, framesetOK bool + // im is the current insertion mode. + im insertionMode // originalIM is the insertion mode to go back to after completing a text // or inTableText insertion mode. originalIM insertionMode @@ -265,37 +267,22 @@ func (p *parser) acknowledgeSelfClosingTag() { // An insertion mode (section 11.2.3.1) is the state transition function from // a particular state in the HTML5 parser's state machine. It updates the -// parser's fields depending on parser.token (where ErrorToken means EOF). In -// addition to returning the next insertionMode state, it also returns whether -// the token was consumed. -type insertionMode func(*parser) (insertionMode, bool) - -// useTheRulesFor runs the delegate insertionMode over p, returning the actual -// insertionMode unless the delegate caused a state transition. -// Section 11.2.3.1, "using the rules for". -func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, bool) { - im, consumed := delegate(p) - if p.originalIM == delegate { - p.originalIM = actual - } - if im != delegate { - return im, consumed - } - return actual, consumed -} +// parser's fields depending on parser.tok (where ErrorToken means EOF). +// It returns whether the token was consumed. +type insertionMode func(*parser) bool // setOriginalIM sets the insertion mode to return to after completing a text or // inTableText insertion mode. // Section 11.2.3.1, "using the rules for". -func (p *parser) setOriginalIM(im insertionMode) { +func (p *parser) setOriginalIM() { if p.originalIM != nil { panic("html: bad parser state: originalIM was set twice") } - p.originalIM = im + p.originalIM = p.im } // Section 11.2.3.1, "reset the insertion mode". -func (p *parser) resetInsertionMode() insertionMode { +func (p *parser) resetInsertionMode() { for i := len(p.oe) - 1; i >= 0; i-- { n := p.oe[i] if i == 0 { @@ -303,95 +290,90 @@ func (p *parser) resetInsertionMode() insertionMode { } switch n.Data { case "select": - return inSelectIM + p.im = inSelectIM case "td", "th": - return inCellIM + p.im = inCellIM case "tr": - return inRowIM + p.im = inRowIM case "tbody", "thead", "tfoot": - return inTableBodyIM + p.im = inTableBodyIM case "caption": - // TODO: return inCaptionIM + p.im = inCaptionIM case "colgroup": - // TODO: return inColumnGroupIM + p.im = inColumnGroupIM case "table": - return inTableIM + p.im = inTableIM case "head": - return inBodyIM + p.im = inBodyIM case "body": - return inBodyIM + p.im = inBodyIM case "frameset": - // TODO: return inFramesetIM + p.im = inFramesetIM case "html": - return beforeHeadIM + p.im = beforeHeadIM + default: + continue } + return } - return inBodyIM + p.im = inBodyIM } // Section 11.2.5.4.1. -func initialIM(p *parser) (insertionMode, bool) { +func initialIM(p *parser) bool { switch p.tok.Type { case CommentToken: p.doc.Add(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return initialIM, true + return true case DoctypeToken: p.doc.Add(&Node{ Type: DoctypeNode, Data: p.tok.Data, }) - return beforeHTMLIM, true + p.im = beforeHTMLIM + return true } // TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper, // and so switching on "quirks mode" might belong in a different package. - return beforeHTMLIM, false + p.im = beforeHTMLIM + return false } // Section 11.2.5.4.2. -func beforeHTMLIM(p *parser) (insertionMode, bool) { - var ( - add bool - attr []Attribute - implied bool - ) +func beforeHTMLIM(p *parser) bool { switch p.tok.Type { - case ErrorToken: - implied = true - case TextToken: - // TODO: distinguish whitespace text from others. - implied = true case StartTagToken: if p.tok.Data == "html" { - add = true - attr = p.tok.Attr - } else { - implied = true + p.addElement(p.tok.Data, p.tok.Attr) + p.im = beforeHeadIM + return true } case EndTagToken: switch p.tok.Data { case "head", "body", "html", "br": - implied = true + // Drop down to creating an implied <html> tag. default: // Ignore the token. + return true } case CommentToken: p.doc.Add(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return beforeHTMLIM, true - } - if add || implied { - p.addElement("html", attr) + return true } - return beforeHeadIM, !implied + // Create an implied <html> tag. + p.addElement("html", nil) + p.im = beforeHeadIM + return false } // Section 11.2.5.4.3. -func beforeHeadIM(p *parser) (insertionMode, bool) { +func beforeHeadIM(p *parser) bool { var ( add bool attr []Attribute @@ -409,7 +391,7 @@ func beforeHeadIM(p *parser) (insertionMode, bool) { add = true attr = p.tok.Attr case "html": - return useTheRulesFor(p, beforeHeadIM, inBodyIM) + return inBodyIM(p) default: implied = true } @@ -425,19 +407,20 @@ func beforeHeadIM(p *parser) (insertionMode, bool) { Type: CommentNode, Data: p.tok.Data, }) - return beforeHeadIM, true + return true } if add || implied { p.addElement("head", attr) p.head = p.top() } - return inHeadIM, !implied + p.im = inHeadIM + return !implied } const whitespace = " \t\r\n\f" // Section 11.2.5.4.4. -func inHeadIM(p *parser) (insertionMode, bool) { +func inHeadIM(p *parser) bool { var ( pop bool implied bool @@ -451,7 +434,7 @@ func inHeadIM(p *parser) (insertionMode, bool) { // Add the initial whitespace to the current node. p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) if s == "" { - return inHeadIM, true + return true } p.tok.Data = s } @@ -464,35 +447,42 @@ func inHeadIM(p *parser) (insertionMode, bool) { p.acknowledgeSelfClosingTag() case "script", "title", "noscript", "noframes", "style": p.addElement(p.tok.Data, p.tok.Attr) - p.setOriginalIM(inHeadIM) - return textIM, true + p.setOriginalIM() + p.im = textIM + return true default: implied = true } case EndTagToken: - if p.tok.Data == "head" { + switch p.tok.Data { + case "head": pop = true + case "body", "html", "br": + implied = true + default: + // Ignore the token. + return true } - // TODO. case CommentToken: p.addChild(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return inHeadIM, true + return true } if pop || implied { n := p.oe.pop() if n.Data != "head" { panic("html: bad parser state: <head> element not found, in the in-head insertion mode") } - return afterHeadIM, !implied + p.im = afterHeadIM + return !implied } - return inHeadIM, true + return true } // Section 11.2.5.4.6. -func afterHeadIM(p *parser) (insertionMode, bool) { +func afterHeadIM(p *parser) bool { var ( add bool attr []Attribute @@ -512,11 +502,13 @@ func afterHeadIM(p *parser) (insertionMode, bool) { attr = p.tok.Attr framesetOK = false case "frameset": - // TODO. + p.addElement(p.tok.Data, p.tok.Attr) + p.im = inFramesetIM + return true case "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title": p.oe = append(p.oe, p.head) defer p.oe.pop() - return useTheRulesFor(p, afterHeadIM, inHeadIM) + return inHeadIM(p) case "head": // TODO. default: @@ -524,19 +516,27 @@ func afterHeadIM(p *parser) (insertionMode, bool) { framesetOK = true } case EndTagToken: - // TODO. + switch p.tok.Data { + case "body", "html", "br": + implied = true + framesetOK = true + default: + // Ignore the token. + return true + } case CommentToken: p.addChild(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return afterHeadIM, true + return true } if add || implied { p.addElement("body", attr) p.framesetOK = framesetOK } - return inBodyIM, !implied + p.im = inBodyIM + return !implied } // copyAttributes copies attributes of src not found on dst to dst. @@ -557,7 +557,7 @@ func copyAttributes(dst *Node, src Token) { } // Section 11.2.5.4.7. -func inBodyIM(p *parser) (insertionMode, bool) { +func inBodyIM(p *parser) bool { switch p.tok.Type { case TextToken: p.reconstructActiveFormattingElements() @@ -604,7 +604,8 @@ func inBodyIM(p *parser) (insertionMode, bool) { p.popUntil(buttonScopeStopTags, "p") // TODO: skip this step in quirks mode. p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false - return inTableIM, true + p.im = inTableIM + return true case "hr": p.popUntil(buttonScopeStopTags, "p") p.addElement(p.tok.Data, p.tok.Attr) @@ -616,7 +617,14 @@ func inBodyIM(p *parser) (insertionMode, bool) { p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false // TODO: detect <select> inside a table. - return inSelectIM, true + p.im = inSelectIM + return true + case "form": + if p.form == nil { + p.popUntil(buttonScopeStopTags, "p") + p.addElement(p.tok.Data, p.tok.Attr) + p.form = p.top() + } case "li": p.framesetOK = false for i := len(p.oe) - 1; i >= 0; i-- { @@ -634,7 +642,28 @@ func inBodyIM(p *parser) (insertionMode, bool) { break } p.popUntil(buttonScopeStopTags, "p") - p.addElement("li", p.tok.Attr) + p.addElement(p.tok.Data, p.tok.Attr) + case "dd", "dt": + p.framesetOK = false + for i := len(p.oe) - 1; i >= 0; i-- { + node := p.oe[i] + switch node.Data { + case "dd", "dt": + p.oe = p.oe[:i] + case "address", "div", "p": + continue + default: + if !isSpecialElement[node.Data] { + continue + } + } + break + } + p.popUntil(buttonScopeStopTags, "p") + p.addElement(p.tok.Data, p.tok.Attr) + case "plaintext": + p.popUntil(buttonScopeStopTags, "p") + p.addElement(p.tok.Data, p.tok.Attr) case "optgroup", "option": if p.top().Data == "option" { p.oe.pop() @@ -650,10 +679,50 @@ func inBodyIM(p *parser) (insertionMode, bool) { } } case "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title": - return useTheRulesFor(p, inBodyIM, inHeadIM) + return inHeadIM(p) case "image": p.tok.Data = "img" - return inBodyIM, false + return false + case "isindex": + if p.form != nil { + // Ignore the token. + return true + } + action := "" + prompt := "This is a searchable index. Enter search keywords: " + attr := []Attribute{{Key: "name", Val: "isindex"}} + for _, a := range p.tok.Attr { + switch a.Key { + case "action": + action = a.Val + case "name": + // Ignore the attribute. + case "prompt": + prompt = a.Val + default: + attr = append(attr, a) + } + } + p.acknowledgeSelfClosingTag() + p.popUntil(buttonScopeStopTags, "p") + p.addElement("form", nil) + p.form = p.top() + if action != "" { + p.form.Attr = []Attribute{{Key: "action", Val: action}} + } + p.addElement("hr", nil) + p.oe.pop() + p.addElement("label", nil) + p.addText(prompt) + p.addElement("input", attr) + p.oe.pop() + p.oe.pop() + p.addElement("hr", nil) + p.oe.pop() + p.oe.pop() + p.form = nil + case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr": + // Ignore the token. default: // TODO. p.addElement(p.tok.Data, p.tok.Attr) @@ -662,7 +731,8 @@ func inBodyIM(p *parser) (insertionMode, bool) { switch p.tok.Data { case "body": // TODO: autoclose the stack of open elements. - return afterBodyIM, true + p.im = afterBodyIM + return true case "p": if !p.elementInScope(buttonScopeStopTags, "p") { p.addElement("p", nil) @@ -676,6 +746,9 @@ func inBodyIM(p *parser) (insertionMode, bool) { if p.popUntil(defaultScopeStopTags, p.tok.Data) { p.clearActiveFormattingElements() } + case "br": + p.tok.Type = StartTagToken + return false default: p.inBodyEndTagOther(p.tok.Data) } @@ -686,7 +759,7 @@ func inBodyIM(p *parser) (insertionMode, bool) { }) } - return inBodyIM, true + return true } func (p *parser) inBodyEndTagFormatting(tag string) { @@ -827,45 +900,64 @@ func (p *parser) inBodyEndTagOther(tag string) { } // Section 11.2.5.4.8. -func textIM(p *parser) (insertionMode, bool) { +func textIM(p *parser) bool { switch p.tok.Type { case ErrorToken: p.oe.pop() case TextToken: p.addText(p.tok.Data) - return textIM, true + return true case EndTagToken: p.oe.pop() } - o := p.originalIM + p.im = p.originalIM p.originalIM = nil - return o, p.tok.Type == EndTagToken + return p.tok.Type == EndTagToken } // Section 11.2.5.4.9. -func inTableIM(p *parser) (insertionMode, bool) { +func inTableIM(p *parser) bool { switch p.tok.Type { case ErrorToken: // Stop parsing. - return nil, true + return true case TextToken: // TODO. case StartTagToken: switch p.tok.Data { + case "caption": + p.clearStackToContext(tableScopeStopTags) + p.afe = append(p.afe, &scopeMarker) + p.addElement(p.tok.Data, p.tok.Attr) + p.im = inCaptionIM + return true case "tbody", "tfoot", "thead": p.clearStackToContext(tableScopeStopTags) p.addElement(p.tok.Data, p.tok.Attr) - return inTableBodyIM, true + p.im = inTableBodyIM + return true case "td", "th", "tr": p.clearStackToContext(tableScopeStopTags) p.addElement("tbody", nil) - return inTableBodyIM, false + p.im = inTableBodyIM + return false case "table": if p.popUntil(tableScopeStopTags, "table") { - return p.resetInsertionMode(), false + p.resetInsertionMode() + return false } // Ignore the token. - return inTableIM, true + return true + case "colgroup": + p.clearStackToContext(tableScopeStopTags) + p.addElement(p.tok.Data, p.tok.Attr) + p.im = inColumnGroupIM + return true + case "col": + p.clearStackToContext(tableScopeStopTags) + p.addElement("colgroup", p.tok.Attr) + p.im = inColumnGroupIM + return false default: // TODO. } @@ -873,20 +965,21 @@ func inTableIM(p *parser) (insertionMode, bool) { switch p.tok.Data { case "table": if p.popUntil(tableScopeStopTags, "table") { - return p.resetInsertionMode(), true + p.resetInsertionMode() + return true } // Ignore the token. - return inTableIM, true + return true case "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr": // Ignore the token. - return inTableIM, true + return true } case CommentToken: p.addChild(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return inTableIM, true + return true } switch p.top().Data { @@ -895,7 +988,7 @@ func inTableIM(p *parser) (insertionMode, bool) { defer func() { p.fosterParenting = false }() } - return useTheRulesFor(p, inTableIM, inBodyIM) + return inBodyIM(p) } // clearStackToContext pops elements off the stack of open elements @@ -911,8 +1004,90 @@ func (p *parser) clearStackToContext(stopTags []string) { } } +// Section 11.2.5.4.11. +func inCaptionIM(p *parser) bool { + switch p.tok.Type { + case StartTagToken: + switch p.tok.Data { + case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr": + if p.popUntil(tableScopeStopTags, "caption") { + p.clearActiveFormattingElements() + p.im = inTableIM + return false + } else { + // Ignore the token. + return true + } + } + case EndTagToken: + switch p.tok.Data { + case "caption": + if p.popUntil(tableScopeStopTags, "caption") { + p.clearActiveFormattingElements() + p.im = inTableIM + } + return true + case "table": + if p.popUntil(tableScopeStopTags, "caption") { + p.clearActiveFormattingElements() + p.im = inTableIM + return false + } else { + // Ignore the token. + return true + } + case "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr": + // Ignore the token. + return true + } + } + return inBodyIM(p) +} + +// Section 11.2.5.4.12. +func inColumnGroupIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + return true + case DoctypeToken: + // Ignore the token. + return true + case StartTagToken: + switch p.tok.Data { + case "html": + return inBodyIM(p) + case "col": + p.addElement(p.tok.Data, p.tok.Attr) + p.oe.pop() + p.acknowledgeSelfClosingTag() + return true + } + case EndTagToken: + switch p.tok.Data { + case "colgroup": + if p.oe.top().Data != "html" { + p.oe.pop() + } + p.im = inTableIM + return true + case "col": + // Ignore the token. + return true + } + } + if p.oe.top().Data != "html" { + p.oe.pop() + } + p.im = inTableIM + return false +} + // Section 11.2.5.4.13. -func inTableBodyIM(p *parser) (insertionMode, bool) { +func inTableBodyIM(p *parser) bool { var ( add bool data string @@ -942,31 +1117,33 @@ func inTableBodyIM(p *parser) (insertionMode, bool) { switch p.tok.Data { case "table": if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") { - return inTableIM, false + p.im = inTableIM + return false } // Ignore the token. - return inTableBodyIM, true + return true case "body", "caption", "col", "colgroup", "html", "td", "th", "tr": // Ignore the token. - return inTableBodyIM, true + return true } case CommentToken: p.addChild(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return inTableBodyIM, true + return true } if add { // TODO: clear the stack back to a table body context. p.addElement(data, attr) - return inRowIM, consumed + p.im = inRowIM + return consumed } - return useTheRulesFor(p, inTableBodyIM, inTableIM) + return inTableIM(p) } // Section 11.2.5.4.14. -func inRowIM(p *parser) (insertionMode, bool) { +func inRowIM(p *parser) bool { switch p.tok.Type { case ErrorToken: // TODO. @@ -978,13 +1155,15 @@ func inRowIM(p *parser) (insertionMode, bool) { p.clearStackToContext(tableRowContextStopTags) p.addElement(p.tok.Data, p.tok.Attr) p.afe = append(p.afe, &scopeMarker) - return inCellIM, true + p.im = inCellIM + return true case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr": if p.popUntil(tableScopeStopTags, "tr") { - return inTableBodyIM, false + p.im = inTableBodyIM + return false } // Ignore the token. - return inRowIM, true + return true default: // TODO. } @@ -992,21 +1171,23 @@ func inRowIM(p *parser) (insertionMode, bool) { switch p.tok.Data { case "tr": if p.popUntil(tableScopeStopTags, "tr") { - return inTableBodyIM, true + p.im = inTableBodyIM + return true } // Ignore the token. - return inRowIM, true + return true case "table": if p.popUntil(tableScopeStopTags, "tr") { - return inTableBodyIM, false + p.im = inTableBodyIM + return false } // Ignore the token. - return inRowIM, true + return true case "tbody", "tfoot", "thead": // TODO. case "body", "caption", "col", "colgroup", "html", "td", "th": // Ignore the token. - return inRowIM, true + return true default: // TODO. } @@ -1015,13 +1196,13 @@ func inRowIM(p *parser) (insertionMode, bool) { Type: CommentNode, Data: p.tok.Data, }) - return inRowIM, true + return true } - return useTheRulesFor(p, inRowIM, inTableIM) + return inTableIM(p) } // Section 11.2.5.4.15. -func inCellIM(p *parser) (insertionMode, bool) { +func inCellIM(p *parser) bool { var ( closeTheCellAndReprocess bool ) @@ -1037,10 +1218,11 @@ func inCellIM(p *parser) (insertionMode, bool) { case "td", "th": if !p.popUntil(tableScopeStopTags, p.tok.Data) { // Ignore the token. - return inCellIM, true + return true } p.clearActiveFormattingElements() - return inRowIM, true + p.im = inRowIM + return true case "body", "caption", "col", "colgroup", "html": // TODO. case "table", "tbody", "tfoot", "thead", "tr": @@ -1052,19 +1234,20 @@ func inCellIM(p *parser) (insertionMode, bool) { Type: CommentNode, Data: p.tok.Data, }) - return inCellIM, true + return true } if closeTheCellAndReprocess { if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") { p.clearActiveFormattingElements() - return inRowIM, false + p.im = inRowIM + return false } } - return useTheRulesFor(p, inCellIM, inBodyIM) + return inBodyIM(p) } // Section 11.2.5.4.16. -func inSelectIM(p *parser) (insertionMode, bool) { +func inSelectIM(p *parser) bool { endSelect := false switch p.tok.Type { case ErrorToken: @@ -1081,7 +1264,13 @@ func inSelectIM(p *parser) (insertionMode, bool) { } p.addElement(p.tok.Data, p.tok.Attr) case "optgroup": - // TODO. + if p.top().Data == "option" { + p.oe.pop() + } + if p.top().Data == "optgroup" { + p.oe.pop() + } + p.addElement(p.tok.Data, p.tok.Attr) case "select": endSelect = true case "input", "keygen", "textarea": @@ -1094,9 +1283,17 @@ func inSelectIM(p *parser) (insertionMode, bool) { case EndTagToken: switch p.tok.Data { case "option": - // TODO. + if p.top().Data == "option" { + p.oe.pop() + } case "optgroup": - // TODO. + i := len(p.oe) - 1 + if p.oe[i].Data == "option" { + i-- + } + if p.oe[i].Data == "optgroup" { + p.oe = p.oe[:i] + } case "select": endSelect = true default: @@ -1113,34 +1310,33 @@ func inSelectIM(p *parser) (insertionMode, bool) { switch p.oe[i].Data { case "select": p.oe = p.oe[:i] - return p.resetInsertionMode(), true + p.resetInsertionMode() + return true case "option", "optgroup": continue default: // Ignore the token. - return inSelectIM, true + return true } } } - return inSelectIM, true + return true } // Section 11.2.5.4.18. -func afterBodyIM(p *parser) (insertionMode, bool) { +func afterBodyIM(p *parser) bool { switch p.tok.Type { case ErrorToken: - // TODO. - case TextToken: - // TODO. + // Stop parsing. + return true case StartTagToken: - // TODO. + if p.tok.Data == "html" { + return inBodyIM(p) + } case EndTagToken: - switch p.tok.Data { - case "html": - // TODO: autoclose the stack of open elements. - return afterAfterBodyIM, true - default: - // TODO. + if p.tok.Data == "html" { + p.im = afterAfterBodyIM + return true } case CommentToken: // The comment is attached to the <html> element. @@ -1151,32 +1347,119 @@ func afterBodyIM(p *parser) (insertionMode, bool) { Type: CommentNode, Data: p.tok.Data, }) - return afterBodyIM, true + return true + } + p.im = inBodyIM + return false +} + +// Section 11.2.5.4.19. +func inFramesetIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + switch p.tok.Data { + case "html": + return inBodyIM(p) + case "frameset": + p.addElement(p.tok.Data, p.tok.Attr) + case "frame": + p.addElement(p.tok.Data, p.tok.Attr) + p.oe.pop() + p.acknowledgeSelfClosingTag() + case "noframes": + return inHeadIM(p) + } + case EndTagToken: + switch p.tok.Data { + case "frameset": + if p.oe.top().Data != "html" { + p.oe.pop() + if p.oe.top().Data != "frameset" { + p.im = afterFramesetIM + return true + } + } + } + default: + // Ignore the token. + } + return true +} + +// Section 11.2.5.4.20. +func afterFramesetIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + switch p.tok.Data { + case "html": + return inBodyIM(p) + case "noframes": + return inHeadIM(p) + } + case EndTagToken: + switch p.tok.Data { + case "html": + p.im = afterAfterFramesetIM + return true + } + default: + // Ignore the token. } - // TODO: should this be "return inBodyIM, true"? - return afterBodyIM, true + return true } // Section 11.2.5.4.21. -func afterAfterBodyIM(p *parser) (insertionMode, bool) { +func afterAfterBodyIM(p *parser) bool { switch p.tok.Type { case ErrorToken: // Stop parsing. - return nil, true + return true case TextToken: // TODO. case StartTagToken: if p.tok.Data == "html" { - return useTheRulesFor(p, afterAfterBodyIM, inBodyIM) + return inBodyIM(p) } case CommentToken: p.doc.Add(&Node{ Type: CommentNode, Data: p.tok.Data, }) - return afterAfterBodyIM, true + return true + } + p.im = inBodyIM + return false +} + +// Section 11.2.5.4.22. +func afterAfterFramesetIM(p *parser) bool { + switch p.tok.Type { + case CommentToken: + p.addChild(&Node{ + Type: CommentNode, + Data: p.tok.Data, + }) + case StartTagToken: + switch p.tok.Data { + case "html": + return inBodyIM(p) + case "noframes": + return inHeadIM(p) + } + default: + // Ignore the token. } - return inBodyIM, false + return true } // Parse returns the parse tree for the HTML from the given Reader. @@ -1189,9 +1472,10 @@ func Parse(r io.Reader) (*Node, error) { }, scripting: true, framesetOK: true, + im: initialIM, } // Iterate until EOF. Any other error will cause an early return. - im, consumed := initialIM, true + consumed := true for { if consumed { if err := p.read(); err != nil { @@ -1201,11 +1485,11 @@ func Parse(r io.Reader) (*Node, error) { return nil, err } } - im, consumed = im(p) + consumed = p.im(p) } // Loop until the final token (the ErrorToken signifying EOF) is consumed. for { - if im, consumed = im(p); consumed { + if consumed = p.im(p); consumed { break } } diff --git a/libgo/go/html/parse_test.go b/libgo/go/html/parse_test.go index 27979225b33..4f15ae1d554 100644 --- a/libgo/go/html/parse_test.go +++ b/libgo/go/html/parse_test.go @@ -133,8 +133,8 @@ func TestParser(t *testing.T) { n int }{ // TODO(nigeltao): Process all the test cases from all the .dat files. - {"tests1.dat", 92}, - {"tests2.dat", 0}, + {"tests1.dat", -1}, + {"tests2.dat", 43}, {"tests3.dat", 0}, } for _, tf := range testFiles { @@ -213,4 +213,8 @@ var renderTestBlacklist = map[string]bool{ // More cases of <a> being reparented: `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true, `<a><table><a></table><p><a><div><a>`: true, + `<a><table><td><a><table></table><a></tr><a></table><a>`: true, + // A <plaintext> element is reparented, putting it before a table. + // A <plaintext> element can't have anything after it in HTML. + `<table><plaintext><td>`: true, } diff --git a/libgo/go/html/render.go b/libgo/go/html/render.go index c815f35f1e1..92c349fb32c 100644 --- a/libgo/go/html/render.go +++ b/libgo/go/html/render.go @@ -52,7 +52,19 @@ func Render(w io.Writer, n *Node) error { return buf.Flush() } +// plaintextAbort is returned from render1 when a <plaintext> element +// has been rendered. No more end tags should be rendered after that. +var plaintextAbort = errors.New("html: internal error (plaintext abort)") + func render(w writer, n *Node) error { + err := render1(w, n) + if err == plaintextAbort { + err = nil + } + return err +} + +func render1(w writer, n *Node) error { // Render non-element nodes; these are the easy cases. switch n.Type { case ErrorNode: @@ -61,7 +73,7 @@ func render(w writer, n *Node) error { return escape(w, n.Data) case DocumentNode: for _, c := range n.Child { - if err := render(w, c); err != nil { + if err := render1(w, c); err != nil { return err } } @@ -128,7 +140,7 @@ func render(w writer, n *Node) error { // Render any child nodes. switch n.Data { - case "noembed", "noframes", "noscript", "script", "style": + case "noembed", "noframes", "noscript", "plaintext", "script", "style": for _, c := range n.Child { if c.Type != TextNode { return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data) @@ -137,18 +149,23 @@ func render(w writer, n *Node) error { return err } } + if n.Data == "plaintext" { + // Don't render anything else. <plaintext> must be the + // last element in the file, with no closing tag. + return plaintextAbort + } case "textarea", "title": for _, c := range n.Child { if c.Type != TextNode { return fmt.Errorf("html: RCDATA element <%s> has non-text child node", n.Data) } - if err := render(w, c); err != nil { + if err := render1(w, c); err != nil { return err } } default: for _, c := range n.Child { - if err := render(w, c); err != nil { + if err := render1(w, c); err != nil { return err } } diff --git a/libgo/go/html/template/content.go b/libgo/go/html/template/content.go index d720d4ba689..3fb15a6e93f 100644 --- a/libgo/go/html/template/content.go +++ b/libgo/go/html/template/content.go @@ -6,6 +6,7 @@ package template import ( "fmt" + "reflect" ) // Strings of content from a trusted source. @@ -70,10 +71,25 @@ const ( contentTypeUnsafe ) +// indirect returns the value, after dereferencing as many times +// as necessary to reach the base type (or nil). +func indirect(a interface{}) interface{} { + if t := reflect.TypeOf(a); t.Kind() != reflect.Ptr { + // Avoid creating a reflect.Value if it's not a pointer. + return a + } + v := reflect.ValueOf(a) + for v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} + // stringify converts its arguments to a string and the type of the content. +// All pointers are dereferenced, as in the text/template package. func stringify(args ...interface{}) (string, contentType) { if len(args) == 1 { - switch s := args[0].(type) { + switch s := indirect(args[0]).(type) { case string: return s, contentTypePlain case CSS: @@ -90,5 +106,8 @@ func stringify(args ...interface{}) (string, contentType) { return string(s), contentTypeURL } } + for i, arg := range args { + args[i] = indirect(arg) + } return fmt.Sprint(args...), contentTypePlain } diff --git a/libgo/go/html/template/escape_test.go b/libgo/go/html/template/escape_test.go index d8bfa321121..4af583097bd 100644 --- a/libgo/go/html/template/escape_test.go +++ b/libgo/go/html/template/escape_test.go @@ -28,7 +28,7 @@ func (x *goodMarshaler) MarshalJSON() ([]byte, error) { } func TestEscape(t *testing.T) { - var data = struct { + data := struct { F, T bool C, G, H string A, E []string @@ -50,6 +50,7 @@ func TestEscape(t *testing.T) { Z: nil, W: HTML(`¡<b class="foo">Hello</b>, <textarea>O'World</textarea>!`), } + pdata := &data tests := []struct { name string @@ -668,6 +669,15 @@ func TestEscape(t *testing.T) { t.Errorf("%s: escaped output: want\n\t%q\ngot\n\t%q", test.name, w, g) continue } + b.Reset() + if err := tmpl.Execute(b, pdata); err != nil { + t.Errorf("%s: template execution failed for pointer: %s", test.name, err) + continue + } + if w, g := test.output, b.String(); w != g { + t.Errorf("%s: escaped output for pointer: want\n\t%q\ngot\n\t%q", test.name, w, g) + continue + } } } @@ -1605,6 +1615,29 @@ func TestRedundantFuncs(t *testing.T) { } } +func TestIndirectPrint(t *testing.T) { + a := 3 + ap := &a + b := "hello" + bp := &b + bpp := &bp + tmpl := Must(New("t").Parse(`{{.}}`)) + var buf bytes.Buffer + err := tmpl.Execute(&buf, ap) + if err != nil { + t.Errorf("Unexpected error: %s", err) + } else if buf.String() != "3" { + t.Errorf(`Expected "3"; got %q`, buf.String()) + } + buf.Reset() + err = tmpl.Execute(&buf, bpp) + if err != nil { + t.Errorf("Unexpected error: %s", err) + } else if buf.String() != "hello" { + t.Errorf(`Expected "hello"; got %q`, buf.String()) + } +} + func BenchmarkEscapedExecute(b *testing.B) { tmpl := Must(New("t").Parse(`<a onclick="alert('{{.}}')">{{.}}</a>`)) var buf bytes.Buffer diff --git a/libgo/go/html/template/js.go b/libgo/go/html/template/js.go index 68c53e5ca3b..0e632df4220 100644 --- a/libgo/go/html/template/js.go +++ b/libgo/go/html/template/js.go @@ -8,6 +8,7 @@ import ( "bytes" "encoding/json" "fmt" + "reflect" "strings" "unicode/utf8" ) @@ -117,12 +118,24 @@ var regexpPrecederKeywords = map[string]bool{ "void": true, } +var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() + +// indirectToJSONMarshaler returns the value, after dereferencing as many times +// as necessary to reach the base type (or nil) or an implementation of json.Marshal. +func indirectToJSONMarshaler(a interface{}) interface{} { + v := reflect.ValueOf(a) + for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} + // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has -// nether side-effects nor free variables outside (NaN, Infinity). +// neither side-effects nor free variables outside (NaN, Infinity). func jsValEscaper(args ...interface{}) string { var a interface{} if len(args) == 1 { - a = args[0] + a = indirectToJSONMarshaler(args[0]) switch t := a.(type) { case JS: return string(t) @@ -135,6 +148,9 @@ func jsValEscaper(args ...interface{}) string { a = t.String() } } else { + for i, arg := range args { + args[i] = indirectToJSONMarshaler(arg) + } a = fmt.Sprint(args...) } // TODO: detect cycles before calling Marshal which loops infinitely on diff --git a/libgo/go/html/token.go b/libgo/go/html/token.go index 2c138227b10..9400873e6b8 100644 --- a/libgo/go/html/token.go +++ b/libgo/go/html/token.go @@ -401,14 +401,14 @@ func (z *Tokenizer) readStartTag() TokenType { break } } - // Any "<noembed>", "<noframes>", "<noscript>", "<script>", "<style>", + // Any "<noembed>", "<noframes>", "<noscript>", "<plaintext", "<script>", "<style>", // "<textarea>" or "<title>" tag flags the tokenizer's next token as raw. - // The tag name lengths of these special cases ranges in [5, 8]. - if x := z.data.end - z.data.start; 5 <= x && x <= 8 { + // The tag name lengths of these special cases ranges in [5, 9]. + if x := z.data.end - z.data.start; 5 <= x && x <= 9 { switch z.buf[z.data.start] { - case 'n', 's', 't', 'N', 'S', 'T': + case 'n', 'p', 's', 't', 'N', 'P', 'S', 'T': switch s := strings.ToLower(string(z.buf[z.data.start:z.data.end])); s { - case "noembed", "noframes", "noscript", "script", "style", "textarea", "title": + case "noembed", "noframes", "noscript", "plaintext", "script", "style", "textarea", "title": z.rawTag = s } } @@ -551,9 +551,19 @@ func (z *Tokenizer) Next() TokenType { z.data.start = z.raw.end z.data.end = z.raw.end if z.rawTag != "" { - z.readRawOrRCDATA() - z.tt = TextToken - return z.tt + if z.rawTag == "plaintext" { + // Read everything up to EOF. + for z.err == nil { + z.readByte() + } + z.textIsRaw = true + } else { + z.readRawOrRCDATA() + } + if z.data.end > z.data.start { + z.tt = TextToken + return z.tt + } } z.textIsRaw = false |