summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/html/parser/HTMLTreeBuilder.cpp')
-rw-r--r--Source/WebCore/html/parser/HTMLTreeBuilder.cpp2841
1 files changed, 2841 insertions, 0 deletions
diff --git a/Source/WebCore/html/parser/HTMLTreeBuilder.cpp b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
new file mode 100644
index 000000000..09e0a8e10
--- /dev/null
+++ b/Source/WebCore/html/parser/HTMLTreeBuilder.cpp
@@ -0,0 +1,2841 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ * Copyright (C) 2011 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLTreeBuilder.h"
+
+#include "Comment.h"
+#include "DOMWindow.h"
+#include "DocumentFragment.h"
+#include "DocumentType.h"
+#include "Frame.h"
+#include "HTMLDocument.h"
+#include "HTMLDocumentParser.h"
+#include "HTMLElementFactory.h"
+#include "HTMLFormElement.h"
+#include "HTMLHtmlElement.h"
+#include "HTMLNames.h"
+#include "HTMLParserIdioms.h"
+#include "HTMLScriptElement.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "LocalizedStrings.h"
+#include "MathMLNames.h"
+#include "NotImplemented.h"
+#include "SVGNames.h"
+#include "Text.h"
+#include "XLinkNames.h"
+#include "XMLNSNames.h"
+#include "XMLNames.h"
+#include <wtf/unicode/CharacterNames.h>
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+static const int uninitializedLineNumberValue = -1;
+
+static TextPosition uninitializedPositionValue1()
+{
+ return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first());
+}
+
+namespace {
+
+inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
+{
+ return isHTMLSpace(character) || character == replacementCharacter;
+}
+
+inline bool isAllWhitespace(const String& string)
+{
+ return string.isAllSpecialCharacters<isHTMLSpace>();
+}
+
+inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
+{
+ return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
+}
+
+bool isNumberedHeaderTag(const AtomicString& tagName)
+{
+ return tagName == h1Tag
+ || tagName == h2Tag
+ || tagName == h3Tag
+ || tagName == h4Tag
+ || tagName == h5Tag
+ || tagName == h6Tag;
+}
+
+bool isCaptionColOrColgroupTag(const AtomicString& tagName)
+{
+ return tagName == captionTag
+ || tagName == colTag
+ || tagName == colgroupTag;
+}
+
+bool isTableCellContextTag(const AtomicString& tagName)
+{
+ return tagName == thTag || tagName == tdTag;
+}
+
+bool isTableBodyContextTag(const AtomicString& tagName)
+{
+ return tagName == tbodyTag
+ || tagName == tfootTag
+ || tagName == theadTag;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
+bool isSpecialNode(Node* node)
+{
+ if (node->hasTagName(MathMLNames::miTag)
+ || node->hasTagName(MathMLNames::moTag)
+ || node->hasTagName(MathMLNames::mnTag)
+ || node->hasTagName(MathMLNames::msTag)
+ || node->hasTagName(MathMLNames::mtextTag)
+ || node->hasTagName(MathMLNames::annotation_xmlTag)
+ || node->hasTagName(SVGNames::foreignObjectTag)
+ || node->hasTagName(SVGNames::descTag)
+ || node->hasTagName(SVGNames::titleTag))
+ return true;
+ if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
+ return true;
+ if (!isInHTMLNamespace(node))
+ return false;
+ const AtomicString& tagName = node->localName();
+ return tagName == addressTag
+ || tagName == appletTag
+ || tagName == areaTag
+ || tagName == articleTag
+ || tagName == asideTag
+ || tagName == baseTag
+ || tagName == basefontTag
+ || tagName == bgsoundTag
+ || tagName == blockquoteTag
+ || tagName == bodyTag
+ || tagName == brTag
+ || tagName == buttonTag
+ || tagName == captionTag
+ || tagName == centerTag
+ || tagName == colTag
+ || tagName == colgroupTag
+ || tagName == commandTag
+ || tagName == ddTag
+ || tagName == detailsTag
+ || tagName == dirTag
+ || tagName == divTag
+ || tagName == dlTag
+ || tagName == dtTag
+ || tagName == embedTag
+ || tagName == fieldsetTag
+ || tagName == figcaptionTag
+ || tagName == figureTag
+ || tagName == footerTag
+ || tagName == formTag
+ || tagName == frameTag
+ || tagName == framesetTag
+ || isNumberedHeaderTag(tagName)
+ || tagName == headTag
+ || tagName == headerTag
+ || tagName == hgroupTag
+ || tagName == hrTag
+ || tagName == htmlTag
+ || tagName == iframeTag
+ || tagName == imgTag
+ || tagName == inputTag
+ || tagName == isindexTag
+ || tagName == liTag
+ || tagName == linkTag
+ || tagName == listingTag
+ || tagName == marqueeTag
+ || tagName == menuTag
+ || tagName == metaTag
+ || tagName == navTag
+ || tagName == noembedTag
+ || tagName == noframesTag
+ || tagName == noscriptTag
+ || tagName == objectTag
+ || tagName == olTag
+ || tagName == pTag
+ || tagName == paramTag
+ || tagName == plaintextTag
+ || tagName == preTag
+ || tagName == scriptTag
+ || tagName == sectionTag
+ || tagName == selectTag
+ || tagName == styleTag
+ || tagName == summaryTag
+ || tagName == tableTag
+ || isTableBodyContextTag(tagName)
+ || tagName == tdTag
+ || tagName == textareaTag
+ || tagName == thTag
+ || tagName == titleTag
+ || tagName == trTag
+ || tagName == ulTag
+ || tagName == wbrTag
+ || tagName == xmpTag;
+}
+
+bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
+{
+ return tagName == bTag
+ || tagName == bigTag
+ || tagName == codeTag
+ || tagName == emTag
+ || tagName == fontTag
+ || tagName == iTag
+ || tagName == sTag
+ || tagName == smallTag
+ || tagName == strikeTag
+ || tagName == strongTag
+ || tagName == ttTag
+ || tagName == uTag;
+}
+
+bool isNonAnchorFormattingTag(const AtomicString& tagName)
+{
+ return tagName == nobrTag
+ || isNonAnchorNonNobrFormattingTag(tagName);
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
+bool isFormattingTag(const AtomicString& tagName)
+{
+ return tagName == aTag || isNonAnchorFormattingTag(tagName);
+}
+
+HTMLFormElement* closestFormAncestor(Element* element)
+{
+ while (element) {
+ if (element->hasTagName(formTag))
+ return static_cast<HTMLFormElement*>(element);
+ ContainerNode* parent = element->parentNode();
+ if (!parent || !parent->isElementNode())
+ return 0;
+ element = static_cast<Element*>(parent);
+ }
+ return 0;
+}
+
+} // namespace
+
+class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
+ WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
+public:
+ explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
+ : m_current(token.characters().data())
+ , m_end(m_current + token.characters().size())
+ {
+ ASSERT(!isEmpty());
+ }
+
+ explicit ExternalCharacterTokenBuffer(const String& string)
+ : m_current(string.characters())
+ , m_end(m_current + string.length())
+ {
+ ASSERT(!isEmpty());
+ }
+
+ ~ExternalCharacterTokenBuffer()
+ {
+ ASSERT(isEmpty());
+ }
+
+ bool isEmpty() const { return m_current == m_end; }
+
+ void skipAtMostOneLeadingNewline()
+ {
+ ASSERT(!isEmpty());
+ if (*m_current == '\n')
+ ++m_current;
+ }
+
+ void skipLeadingWhitespace()
+ {
+ skipLeading<isHTMLSpace>();
+ }
+
+ String takeLeadingWhitespace()
+ {
+ return takeLeading<isHTMLSpace>();
+ }
+
+ void skipLeadingNonWhitespace()
+ {
+ skipLeading<isNotHTMLSpace>();
+ }
+
+ String takeRemaining()
+ {
+ ASSERT(!isEmpty());
+ const UChar* start = m_current;
+ m_current = m_end;
+ return String(start, m_current - start);
+ }
+
+ void giveRemainingTo(StringBuilder& recipient)
+ {
+ recipient.append(m_current, m_end - m_current);
+ m_current = m_end;
+ }
+
+ String takeRemainingWhitespace()
+ {
+ ASSERT(!isEmpty());
+ Vector<UChar> whitespace;
+ do {
+ UChar cc = *m_current++;
+ if (isHTMLSpace(cc))
+ whitespace.append(cc);
+ } while (m_current < m_end);
+ // Returning the null string when there aren't any whitespace
+ // characters is slightly cleaner semantically because we don't want
+ // to insert a text node (as opposed to inserting an empty text node).
+ if (whitespace.isEmpty())
+ return String();
+ return String::adopt(whitespace);
+ }
+
+private:
+ template<bool characterPredicate(UChar)>
+ void skipLeading()
+ {
+ ASSERT(!isEmpty());
+ while (characterPredicate(*m_current)) {
+ if (++m_current == m_end)
+ return;
+ }
+ }
+
+ template<bool characterPredicate(UChar)>
+ String takeLeading()
+ {
+ ASSERT(!isEmpty());
+ const UChar* start = m_current;
+ skipLeading<characterPredicate>();
+ if (start == m_current)
+ return String();
+ return String(start, m_current - start);
+ }
+
+ const UChar* m_current;
+ const UChar* m_end;
+};
+
+
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
+ : m_framesetOk(true)
+ , m_document(document)
+ , m_tree(document, maximumDOMTreeDepth)
+ , m_reportErrors(reportErrors)
+ , m_isPaused(false)
+ , m_insertionMode(InitialMode)
+ , m_originalInsertionMode(InitialMode)
+ , m_shouldSkipLeadingNewline(false)
+ , m_parser(parser)
+ , m_scriptToProcessStartPosition(uninitializedPositionValue1())
+ , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+{
+}
+
+// FIXME: Member variables should be grouped into self-initializing structs to
+// minimize code duplication between these constructors.
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks, unsigned maximumDOMTreeDepth)
+ : m_framesetOk(true)
+ , m_fragmentContext(fragment, contextElement, scriptingPermission)
+ , m_document(fragment->document())
+ , m_tree(fragment, scriptingPermission, maximumDOMTreeDepth)
+ , m_reportErrors(false) // FIXME: Why not report errors in fragments?
+ , m_isPaused(false)
+ , m_insertionMode(InitialMode)
+ , m_originalInsertionMode(InitialMode)
+ , m_shouldSkipLeadingNewline(false)
+ , m_parser(parser)
+ , m_scriptToProcessStartPosition(uninitializedPositionValue1())
+ , m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
+ , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
+{
+ // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed.
+ ASSERT(contextElement);
+ if (contextElement) {
+ // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
+ // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
+ // and instead use the DocumentFragment as a root node.
+ m_tree.openElements()->pushRootNode(fragment);
+ resetInsertionModeAppropriately();
+ m_tree.setForm(closestFormAncestor(contextElement));
+ }
+}
+
+HTMLTreeBuilder::~HTMLTreeBuilder()
+{
+}
+
+void HTMLTreeBuilder::detach()
+{
+ // This call makes little sense in fragment mode, but for consistency
+ // DocumentParser expects detach() to always be called before it's destroyed.
+ m_document = 0;
+ // HTMLConstructionSite might be on the callstack when detach() is called
+ // otherwise we'd just call m_tree.clear() here instead.
+ m_tree.detach();
+}
+
+HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
+ : m_fragment(0)
+ , m_contextElement(0)
+ , m_scriptingPermission(FragmentScriptingAllowed)
+{
+}
+
+HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
+ : m_fragment(fragment)
+ , m_contextElement(contextElement)
+ , m_scriptingPermission(scriptingPermission)
+{
+ ASSERT(!fragment->hasChildNodes());
+}
+
+HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
+{
+}
+
+PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition)
+{
+ // Unpause ourselves, callers may pause us again when processing the script.
+ // The HTML5 spec is written as though scripts are executed inside the tree
+ // builder. We pause the parser to exit the tree builder, and then resume
+ // before running scripts.
+ m_isPaused = false;
+ scriptStartPosition = m_scriptToProcessStartPosition;
+ m_scriptToProcessStartPosition = uninitializedPositionValue1();
+ return m_scriptToProcess.release();
+}
+
+void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
+{
+ AtomicHTMLToken token(rawToken);
+
+ // We clear the rawToken in case constructTreeFromAtomicToken
+ // synchronously re-enters the parser. We don't clear the token immedately
+ // for Character tokens because the AtomicHTMLToken avoids copying the
+ // characters by keeping a pointer to the underlying buffer in the
+ // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
+ // the parser.
+ //
+ // FIXME: Stop clearing the rawToken once we start running the parser off
+ // the main thread or once we stop allowing synchronous JavaScript
+ // execution from parseMappedAttribute.
+ if (rawToken.type() != HTMLTokenTypes::Character)
+ rawToken.clear();
+
+ constructTreeFromAtomicToken(token);
+
+ if (!rawToken.isUninitialized()) {
+ ASSERT(rawToken.type() == HTMLTokenTypes::Character);
+ rawToken.clear();
+ }
+}
+
+void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
+{
+ if (shouldProcessTokenInForeignContent(token))
+ processTokenInForeignContent(token);
+ else
+ processToken(token);
+
+ bool inForeignContent = !m_tree.isEmpty()
+ && !isInHTMLNamespace(m_tree.currentNode())
+ && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentNode())
+ && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentNode());
+
+ m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
+ m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+
+ m_tree.executeQueuedTasks();
+ // We might be detached now.
+}
+
+void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
+{
+ switch (token.type()) {
+ case HTMLTokenTypes::Uninitialized:
+ ASSERT_NOT_REACHED();
+ break;
+ case HTMLTokenTypes::DOCTYPE:
+ m_shouldSkipLeadingNewline = false;
+ processDoctypeToken(token);
+ break;
+ case HTMLTokenTypes::StartTag:
+ m_shouldSkipLeadingNewline = false;
+ processStartTag(token);
+ break;
+ case HTMLTokenTypes::EndTag:
+ m_shouldSkipLeadingNewline = false;
+ processEndTag(token);
+ break;
+ case HTMLTokenTypes::Comment:
+ m_shouldSkipLeadingNewline = false;
+ processComment(token);
+ return;
+ case HTMLTokenTypes::Character:
+ processCharacter(token);
+ break;
+ case HTMLTokenTypes::EndOfFile:
+ m_shouldSkipLeadingNewline = false;
+ processEndOfFile(token);
+ break;
+ }
+}
+
+void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::DOCTYPE);
+ if (m_insertionMode == InitialMode) {
+ m_tree.insertDoctype(token);
+ setInsertionMode(BeforeHTMLMode);
+ return;
+ }
+ if (m_insertionMode == InTableTextMode) {
+ defaultForInTableText();
+ processDoctypeToken(token);
+ return;
+ }
+ parseError(token);
+}
+
+void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassOwnPtr<NamedNodeMap> attributes)
+{
+ // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
+ AtomicHTMLToken fakeToken(HTMLTokenTypes::StartTag, tagName.localName(), attributes);
+ processStartTag(fakeToken);
+}
+
+void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
+{
+ // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
+ AtomicHTMLToken fakeToken(HTMLTokenTypes::EndTag, tagName.localName());
+ processEndTag(fakeToken);
+}
+
+void HTMLTreeBuilder::processFakeCharacters(const String& characters)
+{
+ ASSERT(!characters.isEmpty());
+ ExternalCharacterTokenBuffer buffer(characters);
+ processCharacterBuffer(buffer);
+}
+
+void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
+{
+ if (!m_tree.openElements()->inButtonScope(pTag.localName()))
+ return;
+ AtomicHTMLToken endP(HTMLTokenTypes::EndTag, pTag.localName());
+ processEndTag(endP);
+}
+
+PassOwnPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
+{
+ OwnPtr<NamedNodeMap> attributes = token.takeAttributes();
+ if (!attributes)
+ attributes = NamedNodeMap::create();
+ else {
+ attributes->removeAttribute(nameAttr);
+ attributes->removeAttribute(actionAttr);
+ attributes->removeAttribute(promptAttr);
+ }
+
+ RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
+ attributes->insertAttribute(mappedAttribute.release(), false);
+ return attributes.release();
+}
+
+void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ ASSERT(token.name() == isindexTag);
+ parseError(token);
+ if (m_tree.form())
+ return;
+ notImplemented(); // Acknowledge self-closing flag
+ processFakeStartTag(formTag);
+ RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
+ if (actionAttribute) {
+ ASSERT(m_tree.currentElement()->hasTagName(formTag));
+ m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
+ }
+ processFakeStartTag(hrTag);
+ processFakeStartTag(labelTag);
+ RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
+ if (promptAttribute)
+ processFakeCharacters(promptAttribute->value());
+ else
+ processFakeCharacters(searchableIndexIntroduction());
+ processFakeStartTag(inputTag, attributesForIsindexInput(token));
+ notImplemented(); // This second set of characters may be needed by non-english locales.
+ processFakeEndTag(labelTag);
+ processFakeStartTag(hrTag);
+ processFakeEndTag(formTag);
+}
+
+namespace {
+
+bool isLi(const ContainerNode* element)
+{
+ return element->hasTagName(liTag);
+}
+
+bool isDdOrDt(const ContainerNode* element)
+{
+ return element->hasTagName(ddTag)
+ || element->hasTagName(dtTag);
+}
+
+}
+
+template <bool shouldClose(const ContainerNode*)>
+void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
+{
+ m_framesetOk = false;
+ HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+ while (1) {
+ RefPtr<ContainerNode> node = nodeRecord->node();
+ if (shouldClose(node.get())) {
+ ASSERT(node->isElementNode());
+ processFakeEndTag(toElement(node.get())->tagQName());
+ break;
+ }
+ if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
+ break;
+ nodeRecord = nodeRecord->next();
+ }
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+}
+
+namespace {
+
+typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
+
+void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
+{
+ for (size_t i = 0; i < length; ++i) {
+ const QualifiedName& name = *names[i];
+ const AtomicString& localName = name.localName();
+ AtomicString loweredLocalName = localName.lower();
+ if (loweredLocalName != localName)
+ map->add(loweredLocalName, name);
+ }
+}
+
+void adjustSVGTagNameCase(AtomicHTMLToken& token)
+{
+ static PrefixedNameToQualifiedNameMap* caseMap = 0;
+ if (!caseMap) {
+ caseMap = new PrefixedNameToQualifiedNameMap;
+ size_t length = 0;
+ QualifiedName** svgTags = SVGNames::getSVGTags(&length);
+ mapLoweredLocalNameToName(caseMap, svgTags, length);
+ }
+
+ const QualifiedName& casedName = caseMap->get(token.name());
+ if (casedName.localName().isNull())
+ return;
+ token.setName(casedName.localName());
+}
+
+template<QualifiedName** getAttrs(size_t* length)>
+void adjustAttributes(AtomicHTMLToken& token)
+{
+ static PrefixedNameToQualifiedNameMap* caseMap = 0;
+ if (!caseMap) {
+ caseMap = new PrefixedNameToQualifiedNameMap;
+ size_t length = 0;
+ QualifiedName** attrs = getAttrs(&length);
+ mapLoweredLocalNameToName(caseMap, attrs, length);
+ }
+
+ NamedNodeMap* attributes = token.attributes();
+ if (!attributes)
+ return;
+
+ for (unsigned x = 0; x < attributes->length(); ++x) {
+ Attribute* attribute = attributes->attributeItem(x);
+ const QualifiedName& casedName = caseMap->get(attribute->localName());
+ if (!casedName.localName().isNull())
+ attribute->parserSetName(casedName);
+ }
+}
+
+void adjustSVGAttributes(AtomicHTMLToken& token)
+{
+ adjustAttributes<SVGNames::getSVGAttrs>(token);
+}
+
+void adjustMathMLAttributes(AtomicHTMLToken& token)
+{
+ adjustAttributes<MathMLNames::getMathMLAttrs>(token);
+}
+
+void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
+{
+ for (size_t i = 0; i < length; ++i) {
+ QualifiedName* name = names[i];
+ const AtomicString& localName = name->localName();
+ AtomicString prefixColonLocalName = prefix + ':' + localName;
+ QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
+ map->add(prefixColonLocalName, nameWithPrefix);
+ }
+}
+
+void adjustForeignAttributes(AtomicHTMLToken& token)
+{
+ static PrefixedNameToQualifiedNameMap* map = 0;
+ if (!map) {
+ map = new PrefixedNameToQualifiedNameMap;
+ size_t length = 0;
+ QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
+ addNamesWithPrefix(map, "xlink", attrs, length);
+
+ attrs = XMLNames::getXMLAttrs(&length);
+ addNamesWithPrefix(map, "xml", attrs, length);
+
+ map->add("xmlns", XMLNSNames::xmlnsAttr);
+ map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
+ }
+
+ NamedNodeMap* attributes = token.attributes();
+ if (!attributes)
+ return;
+
+ for (unsigned x = 0; x < attributes->length(); ++x) {
+ Attribute* attribute = attributes->attributeItem(x);
+ const QualifiedName& name = map->get(attribute->localName());
+ if (!name.localName().isNull())
+ attribute->parserSetName(name);
+ }
+}
+
+}
+
+void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == baseTag
+ || token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == commandTag
+ || token.name() == linkTag
+ || token.name() == metaTag
+ || token.name() == noframesTag
+ || token.name() == scriptTag
+ || token.name() == styleTag
+ || token.name() == titleTag) {
+ bool didProcess = processStartTagForInHead(token);
+ ASSERT_UNUSED(didProcess, didProcess);
+ return;
+ }
+ if (token.name() == bodyTag) {
+ if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ m_framesetOk = false;
+ m_tree.insertHTMLBodyStartTagInBody(token);
+ return;
+ }
+ if (token.name() == framesetTag) {
+ parseError(token);
+ if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ if (!m_framesetOk)
+ return;
+ ExceptionCode ec = 0;
+ m_tree.openElements()->bodyElement()->remove(ec);
+ ASSERT(!ec);
+ m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
+ m_tree.openElements()->popHTMLBodyElement();
+ ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InFramesetMode);
+ return;
+ }
+ if (token.name() == addressTag
+ || token.name() == articleTag
+ || token.name() == asideTag
+ || token.name() == blockquoteTag
+ || token.name() == centerTag
+ || token.name() == detailsTag
+ || token.name() == dirTag
+ || token.name() == divTag
+ || token.name() == dlTag
+ || token.name() == fieldsetTag
+ || token.name() == figcaptionTag
+ || token.name() == figureTag
+ || token.name() == footerTag
+ || token.name() == headerTag
+ || token.name() == hgroupTag
+ || token.name() == menuTag
+ || token.name() == navTag
+ || token.name() == olTag
+ || token.name() == pTag
+ || token.name() == sectionTag
+ || token.name() == summaryTag
+ || token.name() == ulTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (isNumberedHeaderTag(token.name())) {
+ processFakePEndTagIfPInButtonScope();
+ if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
+ parseError(token);
+ m_tree.openElements()->pop();
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == preTag || token.name() == listingTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+ m_shouldSkipLeadingNewline = true;
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == formTag) {
+ if (m_tree.form()) {
+ parseError(token);
+ return;
+ }
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLFormElement(token);
+ return;
+ }
+ if (token.name() == liTag) {
+ processCloseWhenNestedTag<isLi>(token);
+ return;
+ }
+ if (token.name() == ddTag || token.name() == dtTag) {
+ processCloseWhenNestedTag<isDdOrDt>(token);
+ return;
+ }
+ if (token.name() == plaintextTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+ return;
+ }
+ if (token.name() == buttonTag) {
+ if (m_tree.openElements()->inScope(buttonTag)) {
+ parseError(token);
+ processFakeEndTag(buttonTag);
+ processStartTag(token); // FIXME: Could we just fall through here?
+ return;
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == aTag) {
+ Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
+ if (activeATag) {
+ parseError(token);
+ processFakeEndTag(aTag);
+ m_tree.activeFormattingElements()->remove(activeATag);
+ if (m_tree.openElements()->contains(activeATag))
+ m_tree.openElements()->remove(activeATag);
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertFormattingElement(token);
+ return;
+ }
+ if (isNonAnchorNonNobrFormattingTag(token.name())) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertFormattingElement(token);
+ return;
+ }
+ if (token.name() == nobrTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ if (m_tree.openElements()->inScope(nobrTag)) {
+ parseError(token);
+ processFakeEndTag(nobrTag);
+ m_tree.reconstructTheActiveFormattingElements();
+ }
+ m_tree.insertFormattingElement(token);
+ return;
+ }
+ if (token.name() == appletTag
+ || token.name() == marqueeTag
+ || token.name() == objectTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ m_tree.activeFormattingElements()->appendMarker();
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == tableTag) {
+ if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
+ processFakeEndTag(pTag);
+ m_tree.insertHTMLElement(token);
+ m_framesetOk = false;
+ setInsertionMode(InTableMode);
+ return;
+ }
+ if (token.name() == imageTag) {
+ parseError(token);
+ // Apparently we're not supposed to ask.
+ token.setName(imgTag.localName());
+ // Note the fall through to the imgTag handling below!
+ }
+ if (token.name() == areaTag
+ || token.name() == brTag
+ || token.name() == embedTag
+ || token.name() == imgTag
+ || token.name() == keygenTag
+ || token.name() == wbrTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertSelfClosingHTMLElement(token);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == inputTag) {
+ RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertSelfClosingHTMLElement(token);
+ if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == paramTag
+ || token.name() == sourceTag
+ || token.name() == trackTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ if (token.name() == hrTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.insertSelfClosingHTMLElement(token);
+ m_framesetOk = false;
+ return;
+ }
+ if (token.name() == isindexTag) {
+ processIsindexStartTagForInBody(token);
+ return;
+ }
+ if (token.name() == textareaTag) {
+ m_tree.insertHTMLElement(token);
+ m_shouldSkipLeadingNewline = true;
+ m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+ m_originalInsertionMode = m_insertionMode;
+ m_framesetOk = false;
+ setInsertionMode(TextMode);
+ return;
+ }
+ if (token.name() == xmpTag) {
+ processFakePEndTagIfPInButtonScope();
+ m_tree.reconstructTheActiveFormattingElements();
+ m_framesetOk = false;
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == iframeTag) {
+ m_framesetOk = false;
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
+ processGenericRawTextStartTag(token);
+ return;
+ }
+ if (token.name() == selectTag) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ m_framesetOk = false;
+ if (m_insertionMode == InTableMode
+ || m_insertionMode == InCaptionMode
+ || m_insertionMode == InColumnGroupMode
+ || m_insertionMode == InTableBodyMode
+ || m_insertionMode == InRowMode
+ || m_insertionMode == InCellMode)
+ setInsertionMode(InSelectInTableMode);
+ else
+ setInsertionMode(InSelectMode);
+ return;
+ }
+ if (token.name() == optgroupTag || token.name() == optionTag) {
+ if (m_tree.currentNode()->hasTagName(optionTag)) {
+ AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
+ processEndTag(endOption);
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == rpTag || token.name() == rtTag) {
+ if (m_tree.openElements()->inScope(rubyTag.localName())) {
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentNode()->hasTagName(rubyTag))
+ parseError(token);
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == MathMLNames::mathTag.localName()) {
+ m_tree.reconstructTheActiveFormattingElements();
+ adjustMathMLAttributes(token);
+ adjustForeignAttributes(token);
+ m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
+ return;
+ }
+ if (token.name() == SVGNames::svgTag.localName()) {
+ m_tree.reconstructTheActiveFormattingElements();
+ adjustSVGAttributes(token);
+ adjustForeignAttributes(token);
+ m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
+ return;
+ }
+ if (isCaptionColOrColgroupTag(token.name())
+ || token.name() == frameTag
+ || token.name() == headTag
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertHTMLElement(token);
+}
+
+bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
+{
+ if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error
+ return false;
+ }
+ m_tree.openElements()->pop();
+ setInsertionMode(InTableMode);
+ return true;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
+void HTMLTreeBuilder::closeTheCell()
+{
+ ASSERT(insertionMode() == InCellMode);
+ if (m_tree.openElements()->inTableScope(tdTag)) {
+ ASSERT(!m_tree.openElements()->inTableScope(thTag));
+ processFakeEndTag(tdTag);
+ return;
+ }
+ ASSERT(m_tree.openElements()->inTableScope(thTag));
+ processFakeEndTag(thTag);
+ ASSERT(insertionMode() == InRowMode);
+}
+
+void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ if (token.name() == captionTag) {
+ m_tree.openElements()->popUntilTableScopeMarker();
+ m_tree.activeFormattingElements()->appendMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InCaptionMode);
+ return;
+ }
+ if (token.name() == colgroupTag) {
+ m_tree.openElements()->popUntilTableScopeMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InColumnGroupMode);
+ return;
+ }
+ if (token.name() == colTag) {
+ processFakeStartTag(colgroupTag);
+ ASSERT(InColumnGroupMode);
+ processStartTag(token);
+ return;
+ }
+ if (isTableBodyContextTag(token.name())) {
+ m_tree.openElements()->popUntilTableScopeMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InTableBodyMode);
+ return;
+ }
+ if (isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ processFakeStartTag(tbodyTag);
+ ASSERT(insertionMode() == InTableBodyMode);
+ processStartTag(token);
+ return;
+ }
+ if (token.name() == tableTag) {
+ parseError(token);
+ if (!processTableEndTagForInTable()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ processStartTag(token);
+ return;
+ }
+ if (token.name() == styleTag || token.name() == scriptTag) {
+ processStartTagForInHead(token);
+ return;
+ }
+ if (token.name() == inputTag) {
+ Attribute* typeAttribute = token.getAttributeItem(typeAttr);
+ if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
+ parseError(token);
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ // Fall through to "anything else" case.
+ }
+ if (token.name() == formTag) {
+ parseError(token);
+ if (m_tree.form())
+ return;
+ m_tree.insertHTMLFormElement(token, true);
+ m_tree.openElements()->pop();
+ return;
+ }
+ parseError(token);
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ processStartTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ switch (insertionMode()) {
+ case InitialMode:
+ ASSERT(insertionMode() == InitialMode);
+ defaultForInitial();
+ // Fall through.
+ case BeforeHTMLMode:
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
+ setInsertionMode(BeforeHeadMode);
+ return;
+ }
+ defaultForBeforeHTML();
+ // Fall through.
+ case BeforeHeadMode:
+ ASSERT(insertionMode() == BeforeHeadMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == headTag) {
+ m_tree.insertHTMLHeadElement(token);
+ setInsertionMode(InHeadMode);
+ return;
+ }
+ defaultForBeforeHead();
+ // Fall through.
+ case InHeadMode:
+ ASSERT(insertionMode() == InHeadMode);
+ if (processStartTagForInHead(token))
+ return;
+ defaultForInHead();
+ // Fall through.
+ case AfterHeadMode:
+ ASSERT(insertionMode() == AfterHeadMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == bodyTag) {
+ m_framesetOk = false;
+ m_tree.insertHTMLBodyElement(token);
+ setInsertionMode(InBodyMode);
+ return;
+ }
+ if (token.name() == framesetTag) {
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InFramesetMode);
+ return;
+ }
+ if (token.name() == baseTag
+ || token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == linkTag
+ || token.name() == metaTag
+ || token.name() == noframesTag
+ || token.name() == scriptTag
+ || token.name() == styleTag
+ || token.name() == titleTag) {
+ parseError(token);
+ ASSERT(m_tree.head());
+ m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
+ processStartTagForInHead(token);
+ m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
+ return;
+ }
+ if (token.name() == headTag) {
+ parseError(token);
+ return;
+ }
+ defaultForAfterHead();
+ // Fall through
+ case InBodyMode:
+ ASSERT(insertionMode() == InBodyMode);
+ processStartTagForInBody(token);
+ break;
+ case InTableMode:
+ ASSERT(insertionMode() == InTableMode);
+ processStartTagForInTable(token);
+ break;
+ case InCaptionMode:
+ ASSERT(insertionMode() == InCaptionMode);
+ if (isCaptionColOrColgroupTag(token.name())
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ if (!processCaptionEndTagForInCaption()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ processStartTag(token);
+ return;
+ }
+ processStartTagForInBody(token);
+ break;
+ case InColumnGroupMode:
+ ASSERT(insertionMode() == InColumnGroupMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == colTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ processStartTag(token);
+ break;
+ case InTableBodyMode:
+ ASSERT(insertionMode() == InTableBodyMode);
+ if (token.name() == trTag) {
+ m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InRowMode);
+ return;
+ }
+ if (isTableCellContextTag(token.name())) {
+ parseError(token);
+ processFakeStartTag(trTag);
+ ASSERT(insertionMode() == InRowMode);
+ processStartTag(token);
+ return;
+ }
+ if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
+ // FIXME: This is slow.
+ if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilTableBodyScopeMarker();
+ ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
+ processFakeEndTag(m_tree.currentElement()->tagQName());
+ processStartTag(token);
+ return;
+ }
+ processStartTagForInTable(token);
+ break;
+ case InRowMode:
+ ASSERT(insertionMode() == InRowMode);
+ if (isTableCellContextTag(token.name())) {
+ m_tree.openElements()->popUntilTableRowScopeMarker();
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InCellMode);
+ m_tree.activeFormattingElements()->appendMarker();
+ return;
+ }
+ if (token.name() == trTag
+ || isCaptionColOrColgroupTag(token.name())
+ || isTableBodyContextTag(token.name())) {
+ if (!processTrEndTagForInRow()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ ASSERT(insertionMode() == InTableBodyMode);
+ processStartTag(token);
+ return;
+ }
+ processStartTagForInTable(token);
+ break;
+ case InCellMode:
+ ASSERT(insertionMode() == InCellMode);
+ if (isCaptionColOrColgroupTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag
+ || isTableBodyContextTag(token.name())) {
+ // FIXME: This could be more efficient.
+ if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ closeTheCell();
+ processStartTag(token);
+ return;
+ }
+ processStartTagForInBody(token);
+ break;
+ case AfterBodyMode:
+ case AfterAfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ setInsertionMode(InBodyMode);
+ processStartTag(token);
+ break;
+ case InHeadNoscriptMode:
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == linkTag
+ || token.name() == metaTag
+ || token.name() == noframesTag
+ || token.name() == styleTag) {
+ bool didProcess = processStartTagForInHead(token);
+ ASSERT_UNUSED(didProcess, didProcess);
+ return;
+ }
+ if (token.name() == htmlTag || token.name() == noscriptTag) {
+ parseError(token);
+ return;
+ }
+ defaultForInHeadNoscript();
+ processToken(token);
+ break;
+ case InFramesetMode:
+ ASSERT(insertionMode() == InFramesetMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == framesetTag) {
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == frameTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ return;
+ }
+ if (token.name() == noframesTag) {
+ processStartTagForInHead(token);
+ return;
+ }
+ parseError(token);
+ break;
+ case AfterFramesetMode:
+ case AfterAfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == noframesTag) {
+ processStartTagForInHead(token);
+ return;
+ }
+ parseError(token);
+ break;
+ case InSelectInTableMode:
+ ASSERT(insertionMode() == InSelectInTableMode);
+ if (token.name() == captionTag
+ || token.name() == tableTag
+ || isTableBodyContextTag(token.name())
+ || token.name() == trTag
+ || isTableCellContextTag(token.name())) {
+ parseError(token);
+ AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ processStartTag(token);
+ return;
+ }
+ // Fall through
+ case InSelectMode:
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return;
+ }
+ if (token.name() == optionTag) {
+ if (m_tree.currentNode()->hasTagName(optionTag)) {
+ AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
+ processEndTag(endOption);
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == optgroupTag) {
+ if (m_tree.currentNode()->hasTagName(optionTag)) {
+ AtomicHTMLToken endOption(HTMLTokenTypes::EndTag, optionTag.localName());
+ processEndTag(endOption);
+ }
+ if (m_tree.currentNode()->hasTagName(optgroupTag)) {
+ AtomicHTMLToken endOptgroup(HTMLTokenTypes::EndTag, optgroupTag.localName());
+ processEndTag(endOptgroup);
+ }
+ m_tree.insertHTMLElement(token);
+ return;
+ }
+ if (token.name() == selectTag) {
+ parseError(token);
+ AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ return;
+ }
+ if (token.name() == inputTag
+ || token.name() == keygenTag
+ || token.name() == textareaTag) {
+ parseError(token);
+ if (!m_tree.openElements()->inSelectScope(selectTag)) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ processStartTag(token);
+ return;
+ }
+ if (token.name() == scriptTag) {
+ bool didProcess = processStartTagForInHead(token);
+ ASSERT_UNUSED(didProcess, didProcess);
+ return;
+ }
+ break;
+ case InTableTextMode:
+ defaultForInTableText();
+ processStartTag(token);
+ break;
+ case TextMode:
+ ASSERT_NOT_REACHED();
+ break;
+ }
+}
+
+bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ ASSERT(token.name() == bodyTag);
+ if (!m_tree.openElements()->inScope(bodyTag.localName())) {
+ parseError(token);
+ return false;
+ }
+ notImplemented(); // Emit a more specific parse error based on stack contents.
+ setInsertionMode(AfterBodyMode);
+ return true;
+}
+
+void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
+ while (1) {
+ RefPtr<ContainerNode> node = record->node();
+ if (node->hasLocalName(token.name())) {
+ m_tree.generateImpliedEndTags();
+ // FIXME: The ElementRecord pointed to by record might be deleted by
+ // the preceding call. Perhaps we should hold a RefPtr so that it
+ // stays alive for the duration of record's scope.
+ record = 0;
+ if (!m_tree.currentNode()->hasLocalName(token.name())) {
+ parseError(token);
+ // FIXME: This is either a bug in the spec, or a bug in our
+ // implementation. Filed a bug with HTML5:
+ // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
+ // We might have already popped the node for the token in
+ // generateImpliedEndTags, just abort.
+ if (!m_tree.openElements()->contains(toElement(node.get())))
+ return;
+ }
+ m_tree.openElements()->popUntilPopped(toElement(node.get()));
+ return;
+ }
+ if (isSpecialNode(node.get())) {
+ parseError(token);
+ return;
+ }
+ record = record->next();
+ }
+}
+
+// FIXME: This probably belongs on HTMLElementStack.
+HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
+{
+ HTMLElementStack::ElementRecord* furthestBlock = 0;
+ HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
+ for (; record; record = record->next()) {
+ if (record->element() == formattingElement)
+ return furthestBlock;
+ if (isSpecialNode(record->element()))
+ furthestBlock = record;
+ }
+ ASSERT_NOT_REACHED();
+ return 0;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
+{
+ // The adoption agency algorithm is N^2. We limit the number of iterations
+ // to stop from hanging the whole browser. This limit is specified in the
+ // adoption agency algorithm:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
+ static const int outerIterationLimit = 8;
+ static const int innerIterationLimit = 3;
+
+ for (int i = 0; i < outerIterationLimit; ++i) {
+ // 1.
+ Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
+ if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
+ parseError(token);
+ notImplemented(); // Check the stack of open elements for a more specific parse error.
+ return;
+ }
+ HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
+ if (!formattingElementRecord) {
+ parseError(token);
+ m_tree.activeFormattingElements()->remove(formattingElement);
+ return;
+ }
+ if (formattingElement != m_tree.currentElement())
+ parseError(token);
+ // 2.
+ HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
+ // 3.
+ if (!furthestBlock) {
+ m_tree.openElements()->popUntilPopped(formattingElement);
+ m_tree.activeFormattingElements()->remove(formattingElement);
+ return;
+ }
+ // 4.
+ ASSERT(furthestBlock->isAbove(formattingElementRecord));
+ RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
+ // 5.
+ HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
+ // 6.
+ HTMLElementStack::ElementRecord* node = furthestBlock;
+ HTMLElementStack::ElementRecord* nextNode = node->next();
+ HTMLElementStack::ElementRecord* lastNode = furthestBlock;
+ for (int i = 0; i < innerIterationLimit; ++i) {
+ // 6.1
+ node = nextNode;
+ ASSERT(node);
+ nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
+ // 6.2
+ if (!m_tree.activeFormattingElements()->contains(node->element())) {
+ m_tree.openElements()->remove(node->element());
+ node = 0;
+ continue;
+ }
+ // 6.3
+ if (node == formattingElementRecord)
+ break;
+ // 6.5
+ RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
+ HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
+ nodeEntry->replaceElement(newElement.get());
+ node->replaceElement(newElement.release());
+ // 6.4 -- Intentionally out of order to handle the case where node
+ // was replaced in 6.5.
+ // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
+ if (lastNode == furthestBlock)
+ bookmark.moveToAfter(nodeEntry);
+ // 6.6
+ if (ContainerNode* parent = lastNode->element()->parentNode())
+ parent->parserRemoveChild(lastNode->element());
+ node->element()->parserAddChild(lastNode->element());
+ if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
+ lastNode->element()->lazyAttach();
+ // 6.7
+ lastNode = node;
+ }
+ // 7
+ const AtomicString& commonAncestorTag = commonAncestor->localName();
+ if (ContainerNode* parent = lastNode->element()->parentNode())
+ parent->parserRemoveChild(lastNode->element());
+ // FIXME: If this moves to HTMLConstructionSite, this check should use
+ // causesFosterParenting(tagName) instead.
+ if (commonAncestorTag == tableTag
+ || commonAncestorTag == trTag
+ || isTableBodyContextTag(commonAncestorTag))
+ m_tree.fosterParent(lastNode->element());
+ else {
+ commonAncestor->parserAddChild(lastNode->element());
+ ASSERT(lastNode->node()->isElementNode());
+ ASSERT(lastNode->element()->parentNode());
+ if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
+ lastNode->element()->lazyAttach();
+ }
+ // 8
+ RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
+ // 9
+ newElement->takeAllChildrenFrom(furthestBlock->element());
+ // 10
+ Element* furthestBlockElement = furthestBlock->element();
+ // FIXME: All this creation / parserAddChild / attach business should
+ // be in HTMLConstructionSite. My guess is that steps 8--12
+ // should all be in some HTMLConstructionSite function.
+ furthestBlockElement->parserAddChild(newElement);
+ if (furthestBlockElement->attached() && !newElement->attached()) {
+ // Notice that newElement might already be attached if, for example, one of the reparented
+ // children is a style element, which attaches itself automatically.
+ newElement->attach();
+ }
+ // 11
+ m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
+ // 12
+ m_tree.openElements()->remove(formattingElement);
+ m_tree.openElements()->insertAbove(newElement, furthestBlock);
+ }
+}
+
+void HTMLTreeBuilder::resetInsertionModeAppropriately()
+{
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
+ bool last = false;
+ HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+ while (1) {
+ ContainerNode* node = nodeRecord->node();
+ if (node == m_tree.openElements()->rootNode()) {
+ ASSERT(isParsingFragment());
+ last = true;
+ node = m_fragmentContext.contextElement();
+ }
+ if (node->hasTagName(selectTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InSelectMode);
+ }
+ if (node->hasTagName(tdTag) || node->hasTagName(thTag))
+ return setInsertionMode(InCellMode);
+ if (node->hasTagName(trTag))
+ return setInsertionMode(InRowMode);
+ if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
+ return setInsertionMode(InTableBodyMode);
+ if (node->hasTagName(captionTag))
+ return setInsertionMode(InCaptionMode);
+ if (node->hasTagName(colgroupTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InColumnGroupMode);
+ }
+ if (node->hasTagName(tableTag))
+ return setInsertionMode(InTableMode);
+ if (node->hasTagName(headTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InBodyMode);
+ }
+ if (node->hasTagName(bodyTag))
+ return setInsertionMode(InBodyMode);
+ if (node->hasTagName(framesetTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InFramesetMode);
+ }
+ if (node->hasTagName(htmlTag)) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(BeforeHeadMode);
+ }
+ if (last) {
+ ASSERT(isParsingFragment());
+ return setInsertionMode(InBodyMode);
+ }
+ nodeRecord = nodeRecord->next();
+ }
+}
+
+void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ if (isTableBodyContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilTableBodyScopeMarker();
+ m_tree.openElements()->pop();
+ setInsertionMode(InTableMode);
+ return;
+ }
+ if (token.name() == tableTag) {
+ // FIXME: This is slow.
+ if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilTableBodyScopeMarker();
+ ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
+ processFakeEndTag(m_tree.currentElement()->tagQName());
+ processEndTag(token);
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ processEndTagForInTable(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ if (token.name() == trTag) {
+ processTrEndTagForInRow();
+ return;
+ }
+ if (token.name() == tableTag) {
+ if (!processTrEndTagForInRow()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ ASSERT(insertionMode() == InTableBodyMode);
+ processEndTag(token);
+ return;
+ }
+ if (isTableBodyContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ processFakeEndTag(trTag);
+ ASSERT(insertionMode() == InTableBodyMode);
+ processEndTag(token);
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag
+ || isTableCellContextTag(token.name())) {
+ parseError(token);
+ return;
+ }
+ processEndTagForInTable(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ if (isTableCellContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ m_tree.activeFormattingElements()->clearToLastMarker();
+ setInsertionMode(InRowMode);
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag) {
+ parseError(token);
+ return;
+ }
+ if (token.name() == tableTag
+ || token.name() == trTag
+ || isTableBodyContextTag(token.name())) {
+ if (!m_tree.openElements()->inTableScope(token.name())) {
+ ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
+ parseError(token);
+ return;
+ }
+ closeTheCell();
+ processEndTag(token);
+ return;
+ }
+ processEndTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ if (token.name() == bodyTag) {
+ processBodyEndTagForInBody(token);
+ return;
+ }
+ if (token.name() == htmlTag) {
+ AtomicHTMLToken endBody(HTMLTokenTypes::EndTag, bodyTag.localName());
+ if (processBodyEndTagForInBody(endBody))
+ processEndTag(token);
+ return;
+ }
+ if (token.name() == addressTag
+ || token.name() == articleTag
+ || token.name() == asideTag
+ || token.name() == blockquoteTag
+ || token.name() == buttonTag
+ || token.name() == centerTag
+ || token.name() == detailsTag
+ || token.name() == dirTag
+ || token.name() == divTag
+ || token.name() == dlTag
+ || token.name() == fieldsetTag
+ || token.name() == figcaptionTag
+ || token.name() == figureTag
+ || token.name() == footerTag
+ || token.name() == headerTag
+ || token.name() == hgroupTag
+ || token.name() == listingTag
+ || token.name() == menuTag
+ || token.name() == navTag
+ || token.name() == olTag
+ || token.name() == preTag
+ || token.name() == sectionTag
+ || token.name() == summaryTag
+ || token.name() == ulTag) {
+ if (!m_tree.openElements()->inScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (token.name() == formTag) {
+ RefPtr<Element> node = m_tree.takeForm();
+ if (!node || !m_tree.openElements()->inScope(node.get())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (m_tree.currentElement() != node.get())
+ parseError(token);
+ m_tree.openElements()->remove(node.get());
+ }
+ if (token.name() == pTag) {
+ if (!m_tree.openElements()->inButtonScope(token.name())) {
+ parseError(token);
+ processFakeStartTag(pTag);
+ ASSERT(m_tree.openElements()->inScope(token.name()));
+ processEndTag(token);
+ return;
+ }
+ m_tree.generateImpliedEndTagsWithExclusion(token.name());
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (token.name() == liTag) {
+ if (!m_tree.openElements()->inListItemScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTagsWithExclusion(token.name());
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (token.name() == ddTag
+ || token.name() == dtTag) {
+ if (!m_tree.openElements()->inScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTagsWithExclusion(token.name());
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ return;
+ }
+ if (isNumberedHeaderTag(token.name())) {
+ if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilNumberedHeaderElementPopped();
+ return;
+ }
+ if (isFormattingTag(token.name())) {
+ callTheAdoptionAgency(token);
+ return;
+ }
+ if (token.name() == appletTag
+ || token.name() == marqueeTag
+ || token.name() == objectTag) {
+ if (!m_tree.openElements()->inScope(token.name())) {
+ parseError(token);
+ return;
+ }
+ m_tree.generateImpliedEndTags();
+ if (!m_tree.currentNode()->hasLocalName(token.name()))
+ parseError(token);
+ m_tree.openElements()->popUntilPopped(token.name());
+ m_tree.activeFormattingElements()->clearToLastMarker();
+ return;
+ }
+ if (token.name() == brTag) {
+ parseError(token);
+ processFakeStartTag(brTag);
+ return;
+ }
+ processAnyOtherEndTagForInBody(token);
+}
+
+bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
+{
+ if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error
+ return false;
+ }
+ m_tree.generateImpliedEndTags();
+ // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
+ m_tree.openElements()->popUntilPopped(captionTag.localName());
+ m_tree.activeFormattingElements()->clearToLastMarker();
+ setInsertionMode(InTableMode);
+ return true;
+}
+
+bool HTMLTreeBuilder::processTrEndTagForInRow()
+{
+ if (!m_tree.openElements()->inTableScope(trTag.localName())) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error
+ return false;
+ }
+ m_tree.openElements()->popUntilTableRowScopeMarker();
+ ASSERT(m_tree.currentElement()->hasTagName(trTag));
+ m_tree.openElements()->pop();
+ setInsertionMode(InTableBodyMode);
+ return true;
+}
+
+bool HTMLTreeBuilder::processTableEndTagForInTable()
+{
+ if (!m_tree.openElements()->inTableScope(tableTag)) {
+ ASSERT(isParsingFragment());
+ // FIXME: parse error.
+ return false;
+ }
+ m_tree.openElements()->popUntilPopped(tableTag.localName());
+ resetInsertionModeAppropriately();
+ return true;
+}
+
+void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ if (token.name() == tableTag) {
+ processTableEndTagForInTable();
+ return;
+ }
+ if (token.name() == bodyTag
+ || isCaptionColOrColgroupTag(token.name())
+ || token.name() == htmlTag
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ // Is this redirection necessary here?
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ processEndTagForInBody(token);
+}
+
+void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndTag);
+ switch (insertionMode()) {
+ case InitialMode:
+ ASSERT(insertionMode() == InitialMode);
+ defaultForInitial();
+ // Fall through.
+ case BeforeHTMLMode:
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForBeforeHTML();
+ // Fall through.
+ case BeforeHeadMode:
+ ASSERT(insertionMode() == BeforeHeadMode);
+ if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForBeforeHead();
+ // Fall through.
+ case InHeadMode:
+ ASSERT(insertionMode() == InHeadMode);
+ if (token.name() == headTag) {
+ m_tree.openElements()->popHTMLHeadElement();
+ setInsertionMode(AfterHeadMode);
+ return;
+ }
+ if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForInHead();
+ // Fall through.
+ case AfterHeadMode:
+ ASSERT(insertionMode() == AfterHeadMode);
+ if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForAfterHead();
+ // Fall through
+ case InBodyMode:
+ ASSERT(insertionMode() == InBodyMode);
+ processEndTagForInBody(token);
+ break;
+ case InTableMode:
+ ASSERT(insertionMode() == InTableMode);
+ processEndTagForInTable(token);
+ break;
+ case InCaptionMode:
+ ASSERT(insertionMode() == InCaptionMode);
+ if (token.name() == captionTag) {
+ processCaptionEndTagForInCaption();
+ return;
+ }
+ if (token.name() == tableTag) {
+ parseError(token);
+ if (!processCaptionEndTagForInCaption()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ processEndTag(token);
+ return;
+ }
+ if (token.name() == bodyTag
+ || token.name() == colTag
+ || token.name() == colgroupTag
+ || token.name() == htmlTag
+ || isTableBodyContextTag(token.name())
+ || isTableCellContextTag(token.name())
+ || token.name() == trTag) {
+ parseError(token);
+ return;
+ }
+ processEndTagForInBody(token);
+ break;
+ case InColumnGroupMode:
+ ASSERT(insertionMode() == InColumnGroupMode);
+ if (token.name() == colgroupTag) {
+ processColgroupEndTagForInColumnGroup();
+ return;
+ }
+ if (token.name() == colTag) {
+ parseError(token);
+ return;
+ }
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ return;
+ }
+ processEndTag(token);
+ break;
+ case InRowMode:
+ ASSERT(insertionMode() == InRowMode);
+ processEndTagForInRow(token);
+ break;
+ case InCellMode:
+ ASSERT(insertionMode() == InCellMode);
+ processEndTagForInCell(token);
+ break;
+ case InTableBodyMode:
+ ASSERT(insertionMode() == InTableBodyMode);
+ processEndTagForInTableBody(token);
+ break;
+ case AfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode);
+ if (token.name() == htmlTag) {
+ if (isParsingFragment()) {
+ parseError(token);
+ return;
+ }
+ setInsertionMode(AfterAfterBodyMode);
+ return;
+ }
+ // Fall through.
+ case AfterAfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ parseError(token);
+ setInsertionMode(InBodyMode);
+ processEndTag(token);
+ break;
+ case InHeadNoscriptMode:
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ if (token.name() == noscriptTag) {
+ ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
+ m_tree.openElements()->pop();
+ ASSERT(m_tree.currentElement()->hasTagName(headTag));
+ setInsertionMode(InHeadMode);
+ return;
+ }
+ if (token.name() != brTag) {
+ parseError(token);
+ return;
+ }
+ defaultForInHeadNoscript();
+ processToken(token);
+ break;
+ case TextMode:
+ if (token.name() == scriptTag) {
+ // Pause ourselves so that parsing stops until the script can be processed by the caller.
+ m_isPaused = true;
+ ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
+ m_scriptToProcess = m_tree.currentElement();
+ m_scriptToProcessStartPosition = m_lastScriptElementStartPosition;
+ m_tree.openElements()->pop();
+ if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
+ m_scriptToProcess->removeAllChildren();
+ setInsertionMode(m_originalInsertionMode);
+
+ // This token will not have been created by the tokenizer if a
+ // self-closing script tag was encountered and pre-HTML5 parser
+ // quirks are enabled. We must set the tokenizer's state to
+ // DataState explicitly if the tokenizer didn't have a chance to.
+ ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_usePreHTML5ParserQuirks);
+ m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+ return;
+ }
+ m_tree.openElements()->pop();
+ setInsertionMode(m_originalInsertionMode);
+ break;
+ case InFramesetMode:
+ ASSERT(insertionMode() == InFramesetMode);
+ if (token.name() == framesetTag) {
+ if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->pop();
+ if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
+ setInsertionMode(AfterFramesetMode);
+ return;
+ }
+ break;
+ case AfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode);
+ if (token.name() == htmlTag) {
+ setInsertionMode(AfterAfterFramesetMode);
+ return;
+ }
+ // Fall through.
+ case AfterAfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ parseError(token);
+ break;
+ case InSelectInTableMode:
+ ASSERT(insertionMode() == InSelectInTableMode);
+ if (token.name() == captionTag
+ || token.name() == tableTag
+ || isTableBodyContextTag(token.name())
+ || token.name() == trTag
+ || isTableCellContextTag(token.name())) {
+ parseError(token);
+ if (m_tree.openElements()->inTableScope(token.name())) {
+ AtomicHTMLToken endSelect(HTMLTokenTypes::EndTag, selectTag.localName());
+ processEndTag(endSelect);
+ processEndTag(token);
+ }
+ return;
+ }
+ // Fall through.
+ case InSelectMode:
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+ if (token.name() == optgroupTag) {
+ if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
+ processFakeEndTag(optionTag);
+ if (m_tree.currentNode()->hasTagName(optgroupTag)) {
+ m_tree.openElements()->pop();
+ return;
+ }
+ parseError(token);
+ return;
+ }
+ if (token.name() == optionTag) {
+ if (m_tree.currentNode()->hasTagName(optionTag)) {
+ m_tree.openElements()->pop();
+ return;
+ }
+ parseError(token);
+ return;
+ }
+ if (token.name() == selectTag) {
+ if (!m_tree.openElements()->inSelectScope(token.name())) {
+ ASSERT(isParsingFragment());
+ parseError(token);
+ return;
+ }
+ m_tree.openElements()->popUntilPopped(selectTag.localName());
+ resetInsertionModeAppropriately();
+ return;
+ }
+ break;
+ case InTableTextMode:
+ defaultForInTableText();
+ processEndTag(token);
+ break;
+ }
+}
+
+void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::Comment);
+ if (m_insertionMode == InitialMode
+ || m_insertionMode == BeforeHTMLMode
+ || m_insertionMode == AfterAfterBodyMode
+ || m_insertionMode == AfterAfterFramesetMode) {
+ m_tree.insertCommentOnDocument(token);
+ return;
+ }
+ if (m_insertionMode == AfterBodyMode) {
+ m_tree.insertCommentOnHTMLHtmlElement(token);
+ return;
+ }
+ if (m_insertionMode == InTableTextMode) {
+ defaultForInTableText();
+ processComment(token);
+ return;
+ }
+ m_tree.insertComment(token);
+}
+
+void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::Character);
+ ExternalCharacterTokenBuffer buffer(token);
+ processCharacterBuffer(buffer);
+}
+
+void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
+{
+ReprocessBuffer:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+ // Note that this logic is different than the generic \r\n collapsing
+ // handled in the input stream preprocessor. This logic is here as an
+ // "authoring convenience" so folks can write:
+ //
+ // <pre>
+ // lorem ipsum
+ // lorem ipsum
+ // </pre>
+ //
+ // without getting an extra newline at the start of their <pre> element.
+ if (m_shouldSkipLeadingNewline) {
+ m_shouldSkipLeadingNewline = false;
+ buffer.skipAtMostOneLeadingNewline();
+ if (buffer.isEmpty())
+ return;
+ }
+
+ switch (insertionMode()) {
+ case InitialMode: {
+ ASSERT(insertionMode() == InitialMode);
+ buffer.skipLeadingWhitespace();
+ if (buffer.isEmpty())
+ return;
+ defaultForInitial();
+ // Fall through.
+ }
+ case BeforeHTMLMode: {
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ buffer.skipLeadingWhitespace();
+ if (buffer.isEmpty())
+ return;
+ defaultForBeforeHTML();
+ // Fall through.
+ }
+ case BeforeHeadMode: {
+ ASSERT(insertionMode() == BeforeHeadMode);
+ buffer.skipLeadingWhitespace();
+ if (buffer.isEmpty())
+ return;
+ defaultForBeforeHead();
+ // Fall through.
+ }
+ case InHeadMode: {
+ ASSERT(insertionMode() == InHeadMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
+ if (buffer.isEmpty())
+ return;
+ defaultForInHead();
+ // Fall through.
+ }
+ case AfterHeadMode: {
+ ASSERT(insertionMode() == AfterHeadMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
+ if (buffer.isEmpty())
+ return;
+ defaultForAfterHead();
+ // Fall through.
+ }
+ case InBodyMode:
+ case InCaptionMode:
+ case InCellMode: {
+ ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
+ processCharacterBufferForInBody(buffer);
+ break;
+ }
+ case InTableMode:
+ case InTableBodyMode:
+ case InRowMode: {
+ ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
+ ASSERT(m_pendingTableCharacters.isEmpty());
+ if (m_tree.currentNode()->isElementNode()
+ && (m_tree.currentElement()->hasTagName(HTMLNames::tableTag)
+ || m_tree.currentElement()->hasTagName(HTMLNames::tbodyTag)
+ || m_tree.currentElement()->hasTagName(HTMLNames::tfootTag)
+ || m_tree.currentElement()->hasTagName(HTMLNames::theadTag)
+ || m_tree.currentElement()->hasTagName(HTMLNames::trTag))) {
+ m_originalInsertionMode = m_insertionMode;
+ setInsertionMode(InTableTextMode);
+ // Note that we fall through to the InTableTextMode case below.
+ } else {
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ processCharacterBufferForInBody(buffer);
+ break;
+ }
+ // Fall through.
+ }
+ case InTableTextMode: {
+ buffer.giveRemainingTo(m_pendingTableCharacters);
+ break;
+ }
+ case InColumnGroupMode: {
+ ASSERT(insertionMode() == InColumnGroupMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
+ if (buffer.isEmpty())
+ return;
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ // The spec tells us to drop these characters on the floor.
+ buffer.skipLeadingNonWhitespace();
+ if (buffer.isEmpty())
+ return;
+ }
+ goto ReprocessBuffer;
+ }
+ case AfterBodyMode:
+ case AfterAfterBodyMode: {
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ // FIXME: parse error
+ setInsertionMode(InBodyMode);
+ goto ReprocessBuffer;
+ break;
+ }
+ case TextMode: {
+ ASSERT(insertionMode() == TextMode);
+ m_tree.insertTextNode(buffer.takeRemaining());
+ break;
+ }
+ case InHeadNoscriptMode: {
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ String leadingWhitespace = buffer.takeLeadingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
+ if (buffer.isEmpty())
+ return;
+ defaultForInHeadNoscript();
+ goto ReprocessBuffer;
+ break;
+ }
+ case InFramesetMode:
+ case AfterFramesetMode: {
+ ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ String leadingWhitespace = buffer.takeRemainingWhitespace();
+ if (!leadingWhitespace.isEmpty())
+ m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
+ // FIXME: We should generate a parse error if we skipped over any
+ // non-whitespace characters.
+ break;
+ }
+ case InSelectInTableMode:
+ case InSelectMode: {
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
+ m_tree.insertTextNode(buffer.takeRemaining());
+ break;
+ }
+ case AfterAfterFramesetMode: {
+ String leadingWhitespace = buffer.takeRemainingWhitespace();
+ if (!leadingWhitespace.isEmpty()) {
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertTextNode(leadingWhitespace, AllWhitespace);
+ }
+ // FIXME: We should generate a parse error if we skipped over any
+ // non-whitespace characters.
+ break;
+ }
+ }
+}
+
+void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer)
+{
+ m_tree.reconstructTheActiveFormattingElements();
+ String characters = buffer.takeRemaining();
+ m_tree.insertTextNode(characters);
+ if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
+ m_framesetOk = false;
+}
+
+void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::EndOfFile);
+ switch (insertionMode()) {
+ case InitialMode:
+ ASSERT(insertionMode() == InitialMode);
+ defaultForInitial();
+ // Fall through.
+ case BeforeHTMLMode:
+ ASSERT(insertionMode() == BeforeHTMLMode);
+ defaultForBeforeHTML();
+ // Fall through.
+ case BeforeHeadMode:
+ ASSERT(insertionMode() == BeforeHeadMode);
+ defaultForBeforeHead();
+ // Fall through.
+ case InHeadMode:
+ ASSERT(insertionMode() == InHeadMode);
+ defaultForInHead();
+ // Fall through.
+ case AfterHeadMode:
+ ASSERT(insertionMode() == AfterHeadMode);
+ defaultForAfterHead();
+ // Fall through
+ case InBodyMode:
+ case InCellMode:
+ case InCaptionMode:
+ case InRowMode:
+ ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
+ notImplemented(); // Emit parse error based on what elements are still open.
+ break;
+ case AfterBodyMode:
+ case AfterAfterBodyMode:
+ ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
+ break;
+ case InHeadNoscriptMode:
+ ASSERT(insertionMode() == InHeadNoscriptMode);
+ defaultForInHeadNoscript();
+ processEndOfFile(token);
+ return;
+ case AfterFramesetMode:
+ case AfterAfterFramesetMode:
+ ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
+ break;
+ case InFramesetMode:
+ case InTableMode:
+ case InTableBodyMode:
+ case InSelectInTableMode:
+ case InSelectMode:
+ ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
+ if (m_tree.currentNode() != m_tree.openElements()->rootNode())
+ parseError(token);
+ break;
+ case InColumnGroupMode:
+ if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
+ ASSERT(isParsingFragment());
+ return; // FIXME: Should we break here instead of returning?
+ }
+ if (!processColgroupEndTagForInColumnGroup()) {
+ ASSERT(isParsingFragment());
+ return; // FIXME: Should we break here instead of returning?
+ }
+ processEndOfFile(token);
+ return;
+ case InTableTextMode:
+ defaultForInTableText();
+ processEndOfFile(token);
+ return;
+ case TextMode:
+ parseError(token);
+ if (m_tree.currentNode()->hasTagName(scriptTag))
+ notImplemented(); // mark the script element as "already started".
+ m_tree.openElements()->pop();
+ ASSERT(m_originalInsertionMode != TextMode);
+ setInsertionMode(m_originalInsertionMode);
+ processEndOfFile(token);
+ return;
+ }
+ ASSERT(m_tree.currentNode());
+ m_tree.openElements()->popAll();
+}
+
+void HTMLTreeBuilder::defaultForInitial()
+{
+ notImplemented();
+ if (!m_fragmentContext.fragment())
+ m_document->setCompatibilityMode(Document::QuirksMode);
+ // FIXME: parse error
+ setInsertionMode(BeforeHTMLMode);
+}
+
+void HTMLTreeBuilder::defaultForBeforeHTML()
+{
+ AtomicHTMLToken startHTML(HTMLTokenTypes::StartTag, htmlTag.localName());
+ m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
+ setInsertionMode(BeforeHeadMode);
+}
+
+void HTMLTreeBuilder::defaultForBeforeHead()
+{
+ AtomicHTMLToken startHead(HTMLTokenTypes::StartTag, headTag.localName());
+ processStartTag(startHead);
+}
+
+void HTMLTreeBuilder::defaultForInHead()
+{
+ AtomicHTMLToken endHead(HTMLTokenTypes::EndTag, headTag.localName());
+ processEndTag(endHead);
+}
+
+void HTMLTreeBuilder::defaultForInHeadNoscript()
+{
+ AtomicHTMLToken endNoscript(HTMLTokenTypes::EndTag, noscriptTag.localName());
+ processEndTag(endNoscript);
+}
+
+void HTMLTreeBuilder::defaultForAfterHead()
+{
+ AtomicHTMLToken startBody(HTMLTokenTypes::StartTag, bodyTag.localName());
+ processStartTag(startBody);
+ m_framesetOk = true;
+}
+
+void HTMLTreeBuilder::defaultForInTableText()
+{
+ String characters = m_pendingTableCharacters.toString();
+ m_pendingTableCharacters.clear();
+ if (!isAllWhitespace(characters)) {
+ // FIXME: parse error
+ HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
+ m_tree.reconstructTheActiveFormattingElements();
+ m_tree.insertTextNode(characters, NotAllWhitespace);
+ m_framesetOk = false;
+ setInsertionMode(m_originalInsertionMode);
+ return;
+ }
+ m_tree.insertTextNode(characters);
+ setInsertionMode(m_originalInsertionMode);
+}
+
+bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ if (token.name() == htmlTag) {
+ m_tree.insertHTMLHtmlStartTagInBody(token);
+ return true;
+ }
+ if (token.name() == baseTag
+ || token.name() == basefontTag
+ || token.name() == bgsoundTag
+ || token.name() == commandTag
+ || token.name() == linkTag
+ || token.name() == metaTag) {
+ m_tree.insertSelfClosingHTMLElement(token);
+ // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
+ return true;
+ }
+ if (token.name() == titleTag) {
+ processGenericRCDATAStartTag(token);
+ return true;
+ }
+ if (token.name() == noscriptTag) {
+ if (scriptEnabled(m_document->frame())) {
+ processGenericRawTextStartTag(token);
+ return true;
+ }
+ m_tree.insertHTMLElement(token);
+ setInsertionMode(InHeadNoscriptMode);
+ return true;
+ }
+ if (token.name() == noframesTag || token.name() == styleTag) {
+ processGenericRawTextStartTag(token);
+ return true;
+ }
+ if (token.name() == scriptTag) {
+ processScriptStartTag(token);
+ if (m_usePreHTML5ParserQuirks && token.selfClosing())
+ processFakeEndTag(scriptTag);
+ return true;
+ }
+ if (token.name() == headTag) {
+ parseError(token);
+ return true;
+ }
+ return false;
+}
+
+void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+ m_originalInsertionMode = m_insertionMode;
+ setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ m_tree.insertHTMLElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
+ m_originalInsertionMode = m_insertionMode;
+ setInsertionMode(TextMode);
+}
+
+void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
+{
+ ASSERT(token.type() == HTMLTokenTypes::StartTag);
+ m_tree.insertScriptElement(token);
+ m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
+ m_originalInsertionMode = m_insertionMode;
+
+ TextPosition position = m_parser->textPosition();
+
+ ASSERT(position.m_line == m_parser->tokenizer()->lineNumber());
+
+ m_lastScriptElementStartPosition = position;
+
+ setInsertionMode(TextMode);
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
+bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken& token)
+{
+ if (m_tree.isEmpty())
+ return false;
+ ContainerNode* node = m_tree.currentNode();
+ if (isInHTMLNamespace(node))
+ return false;
+ if (HTMLElementStack::isMathMLTextIntegrationPoint(node)) {
+ if (token.type() == HTMLTokenTypes::StartTag
+ && token.name() != MathMLNames::mglyphTag
+ && token.name() != MathMLNames::malignmarkTag)
+ return false;
+ if (token.type() == HTMLTokenTypes::Character)
+ return false;
+ }
+ if (node->hasTagName(MathMLNames::annotation_xmlTag)
+ && token.type() == HTMLTokenTypes::StartTag
+ && token.name() == SVGNames::svgTag)
+ return false;
+ if (HTMLElementStack::isHTMLIntegrationPoint(node)) {
+ if (token.type() == HTMLTokenTypes::StartTag)
+ return false;
+ if (token.type() == HTMLTokenTypes::Character)
+ return false;
+ }
+ if (token.type() == HTMLTokenTypes::EndOfFile)
+ return false;
+ return true;
+}
+
+void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken& token)
+{
+ switch (token.type()) {
+ case HTMLTokenTypes::Uninitialized:
+ ASSERT_NOT_REACHED();
+ break;
+ case HTMLTokenTypes::DOCTYPE:
+ parseError(token);
+ break;
+ case HTMLTokenTypes::StartTag: {
+ if (token.name() == bTag
+ || token.name() == bigTag
+ || token.name() == blockquoteTag
+ || token.name() == bodyTag
+ || token.name() == brTag
+ || token.name() == centerTag
+ || token.name() == codeTag
+ || token.name() == ddTag
+ || token.name() == divTag
+ || token.name() == dlTag
+ || token.name() == dtTag
+ || token.name() == emTag
+ || token.name() == embedTag
+ || isNumberedHeaderTag(token.name())
+ || token.name() == headTag
+ || token.name() == hrTag
+ || token.name() == iTag
+ || token.name() == imgTag
+ || token.name() == liTag
+ || token.name() == listingTag
+ || token.name() == menuTag
+ || token.name() == metaTag
+ || token.name() == nobrTag
+ || token.name() == olTag
+ || token.name() == pTag
+ || token.name() == preTag
+ || token.name() == rubyTag
+ || token.name() == sTag
+ || token.name() == smallTag
+ || token.name() == spanTag
+ || token.name() == strongTag
+ || token.name() == strikeTag
+ || token.name() == subTag
+ || token.name() == supTag
+ || token.name() == tableTag
+ || token.name() == ttTag
+ || token.name() == uTag
+ || token.name() == ulTag
+ || token.name() == varTag
+ || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
+ parseError(token);
+ m_tree.openElements()->popUntilForeignContentScopeMarker();
+ processStartTag(token);
+ return;
+ }
+ const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
+ if (currentNamespace == MathMLNames::mathmlNamespaceURI)
+ adjustMathMLAttributes(token);
+ if (currentNamespace == SVGNames::svgNamespaceURI) {
+ adjustSVGTagNameCase(token);
+ adjustSVGAttributes(token);
+ }
+ adjustForeignAttributes(token);
+ m_tree.insertForeignElement(token, currentNamespace);
+ break;
+ }
+ case HTMLTokenTypes::EndTag: {
+ if (m_tree.currentNode()->namespaceURI() == SVGNames::svgNamespaceURI)
+ adjustSVGTagNameCase(token);
+
+ if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
+ m_isPaused = true;
+ m_scriptToProcess = m_tree.currentElement();
+ m_tree.openElements()->pop();
+ return;
+ }
+ if (!isInHTMLNamespace(m_tree.currentNode())) {
+ // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
+ HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
+ if (!nodeRecord->node()->hasLocalName(token.name()))
+ parseError(token);
+ while (1) {
+ if (nodeRecord->node()->hasLocalName(token.name())) {
+ m_tree.openElements()->popUntilPopped(nodeRecord->element());
+ return;
+ }
+ nodeRecord = nodeRecord->next();
+
+ if (isInHTMLNamespace(nodeRecord->node()))
+ break;
+ }
+ }
+ // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content.
+ processEndTag(token);
+ break;
+ }
+ case HTMLTokenTypes::Comment:
+ m_tree.insertComment(token);
+ return;
+ case HTMLTokenTypes::Character: {
+ String characters = String(token.characters().data(), token.characters().size());
+ m_tree.insertTextNode(characters);
+ if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
+ m_framesetOk = false;
+ break;
+ }
+ case HTMLTokenTypes::EndOfFile:
+ ASSERT_NOT_REACHED();
+ break;
+ }
+}
+
+void HTMLTreeBuilder::finished()
+{
+ if (isParsingFragment())
+ return;
+
+ ASSERT(m_document);
+ // Warning, this may detach the parser. Do not do anything else after this.
+ m_document->finishedParsing();
+}
+
+void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
+{
+}
+
+bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
+{
+ if (!frame)
+ return false;
+ return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
+}
+
+bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
+{
+ if (!frame)
+ return false;
+ return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
+}
+
+}