Remove vendored html5lib.

author: Stéphane Bidoul <stephane.bidoul@gmail.com> 2022-07-14 19:41:29 +0200
committer: Stéphane Bidoul <stephane.bidoul@gmail.com> 2022-07-16 19:37:11 +0200
commit: d3a318fe59484bcf6affbb0aa1833405aaa24a28 (patch)
tree: 935c7a5d785540010d0b187dd43d76c9eb9a0377
parent: bb2a3d741006c180e3878d0843e0b0ebf3521f52 (diff)
download: pip-d3a318fe59484bcf6affbb0aa1833405aaa24a28.tar.gz
39 files changed, 1 insertions, 13269 deletions
diff --git a/news/html5lib.vendor.rst b/news/html5lib.vendor.rst
new file mode 100644
index 000000000..ca1472e9b
--- /dev/null
+++ b/news/html5lib.vendor.rst
@@ -0,0 +1 @@
+Remove vendored html5lib.
diff --git a/src/pip/_vendor/README.rst b/src/pip/_vendor/README.rst
index 26904ca25..077f1abf7 100644
--- a/src/pip/_vendor/README.rst
+++ b/src/pip/_vendor/README.rst
@@ -104,9 +104,6 @@ Modifications
   rather than ``appdirs``.
 * ``packaging`` has been modified to import its dependencies from
   ``pip._vendor``.
-* ``html5lib`` has been modified to import six from ``pip._vendor``, to prefer
-  importing from ``collections.abc`` instead of ``collections`` and does not
-  import ``xml.etree.cElementTree`` on Python 3.
 * ``CacheControl`` has been modified to import its dependencies from
   ``pip._vendor``.
 * ``requests`` has been modified to import its other dependencies from
diff --git a/src/pip/_vendor/__init__.py b/src/pip/_vendor/__init__.py
index 39579aa96..b22f7abb9 100644
--- a/src/pip/_vendor/__init__.py
+++ b/src/pip/_vendor/__init__.py
@@ -63,7 +63,6 @@ if DEBUNDLED:
     vendored("colorama")
     vendored("distlib")
     vendored("distro")
-    vendored("html5lib")
     vendored("six")
     vendored("six.moves")
     vendored("six.moves.urllib")
diff --git a/src/pip/_vendor/html5lib.pyi b/src/pip/_vendor/html5lib.pyi
deleted file mode 100644
index 9bc9af95e..000000000
--- a/src/pip/_vendor/html5lib.pyi
+++ /dev/null
@@ -1 +0,0 @@
-from html5lib import *
-\ No newline at end of file
diff --git a/src/pip/_vendor/html5lib/LICENSE b/src/pip/_vendor/html5lib/LICENSE
deleted file mode 100644
index c87fa7a00..000000000
--- a/src/pip/_vendor/html5lib/LICENSE
+++ /dev/null
@@ -1,20 +0,0 @@
-Copyright (c) 2006-2013 James Graham and other contributors
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/src/pip/_vendor/html5lib/__init__.py b/src/pip/_vendor/html5lib/__init__.py
deleted file mode 100644
index d1d82f157..000000000
--- a/src/pip/_vendor/html5lib/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-HTML parsing library based on the `WHATWG HTML specification
-<https://whatwg.org/html>`_. The parser is designed to be compatible with
-existing HTML found in the wild and implements well-defined error recovery that
-is largely compatible with modern desktop web browsers.
-
-Example usage::
-
-    from pip._vendor import html5lib
-    with open("my_document.html", "rb") as f:
-        tree = html5lib.parse(f)
-
-For convenience, this module re-exports the following names:
-
-* :func:`~.html5parser.parse`
-* :func:`~.html5parser.parseFragment`
-* :class:`~.html5parser.HTMLParser`
-* :func:`~.treebuilders.getTreeBuilder`
-* :func:`~.treewalkers.getTreeWalker`
-* :func:`~.serializer.serialize`
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from .html5parser import HTMLParser, parse, parseFragment
-from .treebuilders import getTreeBuilder
-from .treewalkers import getTreeWalker
-from .serializer import serialize
-
-__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
-           "getTreeWalker", "serialize"]
-
-# this has to be at the top level, see how setup.py parses this
-#: Distribution version number.
-__version__ = "1.1"
diff --git a/src/pip/_vendor/html5lib/_ihatexml.py b/src/pip/_vendor/html5lib/_ihatexml.py
deleted file mode 100644
index 3ff803c19..000000000
--- a/src/pip/_vendor/html5lib/_ihatexml.py
+++ /dev/null
@@ -1,289 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-import warnings
-
-from .constants import DataLossWarning
-
-baseChar = """
-[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
-[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
-[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
-[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
-[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
-[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
-[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
-[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
-[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
-[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
-[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
-[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
-[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
-[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
-[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
-[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
-[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
-[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
-[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
-[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
-[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
-[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
-[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
-[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
-[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
-[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
-[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
-[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
-[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
-[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
-#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
-#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
-#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
-[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
-[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
-#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
-[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
-[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
-[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
-[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
-[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
-#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
-[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
-[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
-[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
-[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
-
-ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
-
-combiningCharacter = """
-[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
-[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
-[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
-[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
-#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
-[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
-[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
-#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
-[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
-[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
-#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
-[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
-[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
-[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
-[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
-[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
-#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
-[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
-#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
-[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
-[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
-#x3099 | #x309A"""
-
-digit = """
-[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
-[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
-[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
-[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
-
-extender = """
-#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
-#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
-
-letter = " | ".join([baseChar, ideographic])
-
-# Without the
-name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
-                   extender])
-nameFirst = " | ".join([letter, "_"])
-
-reChar = re.compile(r"#x([\d|A-F]{4,4})")
-reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
-
-
-def charStringToList(chars):
-    charRanges = [item.strip() for item in chars.split(" | ")]
-    rv = []
-    for item in charRanges:
-        foundMatch = False
-        for regexp in (reChar, reCharRange):
-            match = regexp.match(item)
-            if match is not None:
-                rv.append([hexToInt(item) for item in match.groups()])
-                if len(rv[-1]) == 1:
-                    rv[-1] = rv[-1] * 2
-                foundMatch = True
-                break
-        if not foundMatch:
-            assert len(item) == 1
-
-            rv.append([ord(item)] * 2)
-    rv = normaliseCharList(rv)
-    return rv
-
-
-def normaliseCharList(charList):
-    charList = sorted(charList)
-    for item in charList:
-        assert item[1] >= item[0]
-    rv = []
-    i = 0
-    while i < len(charList):
-        j = 1
-        rv.append(charList[i])
-        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
-            rv[-1][1] = charList[i + j][1]
-            j += 1
-        i += j
-    return rv
-
-
-# We don't really support characters above the BMP :(
-max_unicode = int("FFFF", 16)
-
-
-def missingRanges(charList):
-    rv = []
-    if charList[0] != 0:
-        rv.append([0, charList[0][0] - 1])
-    for i, item in enumerate(charList[:-1]):
-        rv.append([item[1] + 1, charList[i + 1][0] - 1])
-    if charList[-1][1] != max_unicode:
-        rv.append([charList[-1][1] + 1, max_unicode])
-    return rv
-
-
-def listToRegexpStr(charList):
-    rv = []
-    for item in charList:
-        if item[0] == item[1]:
-            rv.append(escapeRegexp(chr(item[0])))
-        else:
-            rv.append(escapeRegexp(chr(item[0])) + "-" +
-                      escapeRegexp(chr(item[1])))
-    return "[%s]" % "".join(rv)
-
-
-def hexToInt(hex_str):
-    return int(hex_str, 16)
-
-
-def escapeRegexp(string):
-    specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
-                         "[", "]", "|", "(", ")", "-")
-    for char in specialCharacters:
-        string = string.replace(char, "\\" + char)
-
-    return string
-
-# output from the above
-nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
-
-nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
-
-# Simpler things
-nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
-
-
-class InfosetFilter(object):
-    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
-
-    def __init__(self,
-                 dropXmlnsLocalName=False,
-                 dropXmlnsAttrNs=False,
-                 preventDoubleDashComments=False,
-                 preventDashAtCommentEnd=False,
-                 replaceFormFeedCharacters=True,
-                 preventSingleQuotePubid=False):
-
-        self.dropXmlnsLocalName = dropXmlnsLocalName
-        self.dropXmlnsAttrNs = dropXmlnsAttrNs
-
-        self.preventDoubleDashComments = preventDoubleDashComments
-        self.preventDashAtCommentEnd = preventDashAtCommentEnd
-
-        self.replaceFormFeedCharacters = replaceFormFeedCharacters
-
-        self.preventSingleQuotePubid = preventSingleQuotePubid
-
-        self.replaceCache = {}
-
-    def coerceAttribute(self, name, namespace=None):
-        if self.dropXmlnsLocalName and name.startswith("xmlns:"):
-            warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
-            return None
-        elif (self.dropXmlnsAttrNs and
-              namespace == "http://www.w3.org/2000/xmlns/"):
-            warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
-            return None
-        else:
-            return self.toXmlName(name)
-
-    def coerceElement(self, name):
-        return self.toXmlName(name)
-
-    def coerceComment(self, data):
-        if self.preventDoubleDashComments:
-            while "--" in data:
-                warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
-                data = data.replace("--", "- -")
-            if data.endswith("-"):
-                warnings.warn("Comments cannot end in a dash", DataLossWarning)
-                data += " "
-        return data
-
-    def coerceCharacters(self, data):
-        if self.replaceFormFeedCharacters:
-            for _ in range(data.count("\x0C")):
-                warnings.warn("Text cannot contain U+000C", DataLossWarning)
-            data = data.replace("\x0C", " ")
-        # Other non-xml characters
-        return data
-
-    def coercePubid(self, data):
-        dataOutput = data
-        for char in nonPubidCharRegexp.findall(data):
-            warnings.warn("Coercing non-XML pubid", DataLossWarning)
-            replacement = self.getReplacementCharacter(char)
-            dataOutput = dataOutput.replace(char, replacement)
-        if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
-            warnings.warn("Pubid cannot contain single quote", DataLossWarning)
-            dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
-        return dataOutput
-
-    def toXmlName(self, name):
-        nameFirst = name[0]
-        nameRest = name[1:]
-        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
-        if m:
-            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
-            nameFirstOutput = self.getReplacementCharacter(nameFirst)
-        else:
-            nameFirstOutput = nameFirst
-
-        nameRestOutput = nameRest
-        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
-        for char in replaceChars:
-            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
-            replacement = self.getReplacementCharacter(char)
-            nameRestOutput = nameRestOutput.replace(char, replacement)
-        return nameFirstOutput + nameRestOutput
-
-    def getReplacementCharacter(self, char):
-        if char in self.replaceCache:
-            replacement = self.replaceCache[char]
-        else:
-            replacement = self.escapeChar(char)
-        return replacement
-
-    def fromXmlName(self, name):
-        for item in set(self.replacementRegexp.findall(name)):
-            name = name.replace(item, self.unescapeChar(item))
-        return name
-
-    def escapeChar(self, char):
-        replacement = "U%05X" % ord(char)
-        self.replaceCache[char] = replacement
-        return replacement
-
-    def unescapeChar(self, charcode):
-        return chr(int(charcode[1:], 16))
diff --git a/src/pip/_vendor/html5lib/_inputstream.py b/src/pip/_vendor/html5lib/_inputstream.py
deleted file mode 100644
index e0bb37602..000000000
--- a/src/pip/_vendor/html5lib/_inputstream.py
+++ /dev/null
@@ -1,918 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from pip._vendor.six import text_type
-from pip._vendor.six.moves import http_client, urllib
-
-import codecs
-import re
-from io import BytesIO, StringIO
-
-from pip._vendor import webencodings
-
-from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import _ReparseException
-from . import _utils
-
-# Non-unicode versions of constants for use in the pre-parser
-spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
-asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
-asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
-spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
-
-
-invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
-
-if _utils.supports_lone_surrogates:
-    # Use one extra step of indirection and create surrogates with
-    # eval. Not using this indirection would introduce an illegal
-    # unicode literal on platforms not supporting such lone
-    # surrogates.
-    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
-    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
-                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
-                                    "]")
-else:
-    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
-
-non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
-                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
-                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
-                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
-                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
-                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
-                              0x10FFFE, 0x10FFFF}
-
-ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
-
-# Cache for charsUntil()
-charsUntilRegEx = {}
-
-
-class BufferedStream(object):
-    """Buffering for streams that do not have buffering of their own
-
-    The buffer is implemented as a list of chunks on the assumption that
-    joining many strings will be slow since it is O(n**2)
-    """
-
-    def __init__(self, stream):
-        self.stream = stream
-        self.buffer = []
-        self.position = [-1, 0]  # chunk number, offset
-
-    def tell(self):
-        pos = 0
-        for chunk in self.buffer[:self.position[0]]:
-            pos += len(chunk)
-        pos += self.position[1]
-        return pos
-
-    def seek(self, pos):
-        assert pos <= self._bufferedBytes()
-        offset = pos
-        i = 0
-        while len(self.buffer[i]) < offset:
-            offset -= len(self.buffer[i])
-            i += 1
-        self.position = [i, offset]
-
-    def read(self, bytes):
-        if not self.buffer:
-            return self._readStream(bytes)
-        elif (self.position[0] == len(self.buffer) and
-              self.position[1] == len(self.buffer[-1])):
-            return self._readStream(bytes)
-        else:
-            return self._readFromBuffer(bytes)
-
-    def _bufferedBytes(self):
-        return sum([len(item) for item in self.buffer])
-
-    def _readStream(self, bytes):
-        data = self.stream.read(bytes)
-        self.buffer.append(data)
-        self.position[0] += 1
-        self.position[1] = len(data)
-        return data
-
-    def _readFromBuffer(self, bytes):
-        remainingBytes = bytes
-        rv = []
-        bufferIndex = self.position[0]
-        bufferOffset = self.position[1]
-        while bufferIndex < len(self.buffer) and remainingBytes != 0:
-            assert remainingBytes > 0
-            bufferedData = self.buffer[bufferIndex]
-
-            if remainingBytes <= len(bufferedData) - bufferOffset:
-                bytesToRead = remainingBytes
-                self.position = [bufferIndex, bufferOffset + bytesToRead]
-            else:
-                bytesToRead = len(bufferedData) - bufferOffset
-                self.position = [bufferIndex, len(bufferedData)]
-                bufferIndex += 1
-            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
-            remainingBytes -= bytesToRead
-
-            bufferOffset = 0
-
-        if remainingBytes:
-            rv.append(self._readStream(remainingBytes))
-
-        return b"".join(rv)
-
-
-def HTMLInputStream(source, **kwargs):
-    # Work around Python bug #20007: read(0) closes the connection.
-    # http://bugs.python.org/issue20007
-    if (isinstance(source, http_client.HTTPResponse) or
-        # Also check for addinfourl wrapping HTTPResponse
-        (isinstance(source, urllib.response.addbase) and
-         isinstance(source.fp, http_client.HTTPResponse))):
-        isUnicode = False
-    elif hasattr(source, "read"):
-        isUnicode = isinstance(source.read(0), text_type)
-    else:
-        isUnicode = isinstance(source, text_type)
-
-    if isUnicode:
-        encodings = [x for x in kwargs if x.endswith("_encoding")]
-        if encodings:
-            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
-
-        return HTMLUnicodeInputStream(source, **kwargs)
-    else:
-        return HTMLBinaryInputStream(source, **kwargs)
-
-
-class HTMLUnicodeInputStream(object):
-    """Provides a unicode stream of characters to the HTMLTokenizer.
-
-    This class takes care of character encoding and removing or replacing
-    incorrect byte-sequences and also provides column and line tracking.
-
-    """
-
-    _defaultChunkSize = 10240
-
-    def __init__(self, source):
-        """Initialises the HTMLInputStream.
-
-        HTMLInputStream(source, [encoding]) -> Normalized stream from source
-        for use by html5lib.
-
-        source can be either a file-object, local filename or a string.
-
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
-
-        """
-
-        if not _utils.supports_lone_surrogates:
-            # Such platforms will have already checked for such
-            # surrogate errors, so no need to do this checking.
-            self.reportCharacterErrors = None
-        elif len("\U0010FFFF") == 1:
-            self.reportCharacterErrors = self.characterErrorsUCS4
-        else:
-            self.reportCharacterErrors = self.characterErrorsUCS2
-
-        # List of where new lines occur
-        self.newLines = [0]
-
-        self.charEncoding = (lookupEncoding("utf-8"), "certain")
-        self.dataStream = self.openStream(source)
-
-        self.reset()
-
-    def reset(self):
-        self.chunk = ""
-        self.chunkSize = 0
-        self.chunkOffset = 0
-        self.errors = []
-
-        # number of (complete) lines in previous chunks
-        self.prevNumLines = 0
-        # number of columns in the last line of the previous chunk
-        self.prevNumCols = 0
-
-        # Deal with CR LF and surrogates split over chunk boundaries
-        self._bufferedCharacter = None
-
-    def openStream(self, source):
-        """Produces a file object from source.
-
-        source can be either a file object, local filename or a string.
-
-        """
-        # Already a file object
-        if hasattr(source, 'read'):
-            stream = source
-        else:
-            stream = StringIO(source)
-
-        return stream
-
-    def _position(self, offset):
-        chunk = self.chunk
-        nLines = chunk.count('\n', 0, offset)
-        positionLine = self.prevNumLines + nLines
-        lastLinePos = chunk.rfind('\n', 0, offset)
-        if lastLinePos == -1:
-            positionColumn = self.prevNumCols + offset
-        else:
-            positionColumn = offset - (lastLinePos + 1)
-        return (positionLine, positionColumn)
-
-    def position(self):
-        """Returns (line, col) of the current position in the stream."""
-        line, col = self._position(self.chunkOffset)
-        return (line + 1, col)
-
-    def char(self):
-        """ Read one character from the stream or queue if available. Return
-            EOF when EOF is reached.
-        """
-        # Read a new chunk from the input stream if necessary
-        if self.chunkOffset >= self.chunkSize:
-            if not self.readChunk():
-                return EOF
-
-        chunkOffset = self.chunkOffset
-        char = self.chunk[chunkOffset]
-        self.chunkOffset = chunkOffset + 1
-
-        return char
-
-    def readChunk(self, chunkSize=None):
-        if chunkSize is None:
-            chunkSize = self._defaultChunkSize
-
-        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
-
-        self.chunk = ""
-        self.chunkSize = 0
-        self.chunkOffset = 0
-
-        data = self.dataStream.read(chunkSize)
-
-        # Deal with CR LF and surrogates broken across chunks
-        if self._bufferedCharacter:
-            data = self._bufferedCharacter + data
-            self._bufferedCharacter = None
-        elif not data:
-            # We have no more data, bye-bye stream
-            return False
-
-        if len(data) > 1:
-            lastv = ord(data[-1])
-            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
-                self._bufferedCharacter = data[-1]
-                data = data[:-1]
-
-        if self.reportCharacterErrors:
-            self.reportCharacterErrors(data)
-
-        # Replace invalid characters
-        data = data.replace("\r\n", "\n")
-        data = data.replace("\r", "\n")
-
-        self.chunk = data
-        self.chunkSize = len(data)
-
-        return True
-
-    def characterErrorsUCS4(self, data):
-        for _ in range(len(invalid_unicode_re.findall(data))):
-            self.errors.append("invalid-codepoint")
-
-    def characterErrorsUCS2(self, data):
-        # Someone picked the wrong compile option
-        # You lose
-        skip = False
-        for match in invalid_unicode_re.finditer(data):
-            if skip:
-                continue
-            codepoint = ord(match.group())
-            pos = match.start()
-            # Pretty sure there should be endianness issues here
-            if _utils.isSurrogatePair(data[pos:pos + 2]):
-                # We have a surrogate pair!
-                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
-                if char_val in non_bmp_invalid_codepoints:
-                    self.errors.append("invalid-codepoint")
-                skip = True
-            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
-                  pos == len(data) - 1):
-                self.errors.append("invalid-codepoint")
-            else:
-                skip = False
-                self.errors.append("invalid-codepoint")
-
-    def charsUntil(self, characters, opposite=False):
-        """ Returns a string of characters from the stream up to but not
-        including any character in 'characters' or EOF. 'characters' must be
-        a container that supports the 'in' method and iteration over its
-        characters.
-        """
-
-        # Use a cache of regexps to find the required characters
-        try:
-            chars = charsUntilRegEx[(characters, opposite)]
-        except KeyError:
-            if __debug__:
-                for c in characters:
-                    assert(ord(c) < 128)
-            regex = "".join(["\\x%02x" % ord(c) for c in characters])
-            if not opposite:
-                regex = "^%s" % regex
-            chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
-
-        rv = []
-
-        while True:
-            # Find the longest matching prefix
-            m = chars.match(self.chunk, self.chunkOffset)
-            if m is None:
-                # If nothing matched, and it wasn't because we ran out of chunk,
-                # then stop
-                if self.chunkOffset != self.chunkSize:
-                    break
-            else:
-                end = m.end()
-                # If not the whole chunk matched, return everything
-                # up to the part that didn't match
-                if end != self.chunkSize:
-                    rv.append(self.chunk[self.chunkOffset:end])
-                    self.chunkOffset = end
-                    break
-            # If the whole remainder of the chunk matched,
-            # use it all and read the next chunk
-            rv.append(self.chunk[self.chunkOffset:])
-            if not self.readChunk():
-                # Reached EOF
-                break
-
-        r = "".join(rv)
-        return r
-
-    def unget(self, char):
-        # Only one character is allowed to be ungotten at once - it must
-        # be consumed again before any further call to unget
-        if char is not EOF:
-            if self.chunkOffset == 0:
-                # unget is called quite rarely, so it's a good idea to do
-                # more work here if it saves a bit of work in the frequently
-                # called char and charsUntil.
-                # So, just prepend the ungotten character onto the current
-                # chunk:
-                self.chunk = char + self.chunk
-                self.chunkSize += 1
-            else:
-                self.chunkOffset -= 1
-                assert self.chunk[self.chunkOffset] == char
-
-
-class HTMLBinaryInputStream(HTMLUnicodeInputStream):
-    """Provides a unicode stream of characters to the HTMLTokenizer.
-
-    This class takes care of character encoding and removing or replacing
-    incorrect byte-sequences and also provides column and line tracking.
-
-    """
-
-    def __init__(self, source, override_encoding=None, transport_encoding=None,
-                 same_origin_parent_encoding=None, likely_encoding=None,
-                 default_encoding="windows-1252", useChardet=True):
-        """Initialises the HTMLInputStream.
-
-        HTMLInputStream(source, [encoding]) -> Normalized stream from source
-        for use by html5lib.
-
-        source can be either a file-object, local filename or a string.
-
-        The optional encoding parameter must be a string that indicates
-        the encoding.  If specified, that encoding will be used,
-        regardless of any BOM or later declaration (such as in a meta
-        element)
-
-        """
-        # Raw Stream - for unicode objects this will encode to utf-8 and set
-        #              self.charEncoding as appropriate
-        self.rawStream = self.openStream(source)
-
-        HTMLUnicodeInputStream.__init__(self, self.rawStream)
-
-        # Encoding Information
-        # Number of bytes to use when looking for a meta element with
-        # encoding information
-        self.numBytesMeta = 1024
-        # Number of bytes to use when using detecting encoding using chardet
-        self.numBytesChardet = 100
-        # Things from args
-        self.override_encoding = override_encoding
-        self.transport_encoding = transport_encoding
-        self.same_origin_parent_encoding = same_origin_parent_encoding
-        self.likely_encoding = likely_encoding
-        self.default_encoding = default_encoding
-
-        # Determine encoding
-        self.charEncoding = self.determineEncoding(useChardet)
-        assert self.charEncoding[0] is not None
-
-        # Call superclass
-        self.reset()
-
-    def reset(self):
-        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
-        HTMLUnicodeInputStream.reset(self)
-
-    def openStream(self, source):
-        """Produces a file object from source.
-
-        source can be either a file object, local filename or a string.
-
-        """
-        # Already a file object
-        if hasattr(source, 'read'):
-            stream = source
-        else:
-            stream = BytesIO(source)
-
-        try:
-            stream.seek(stream.tell())
-        except Exception:
-            stream = BufferedStream(stream)
-
-        return stream
-
-    def determineEncoding(self, chardet=True):
-        # BOMs take precedence over everything
-        # This will also read past the BOM if present
-        charEncoding = self.detectBOM(), "certain"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # If we've been overridden, we've been overridden
-        charEncoding = lookupEncoding(self.override_encoding), "certain"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Now check the transport layer
-        charEncoding = lookupEncoding(self.transport_encoding), "certain"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Look for meta elements with encoding information
-        charEncoding = self.detectEncodingMeta(), "tentative"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Parent document encoding
-        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
-        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
-            return charEncoding
-
-        # "likely" encoding
-        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Guess with chardet, if available
-        if chardet:
-            try:
-                from pip._vendor.chardet.universaldetector import UniversalDetector
-            except ImportError:
-                pass
-            else:
-                buffers = []
-                detector = UniversalDetector()
-                while not detector.done:
-                    buffer = self.rawStream.read(self.numBytesChardet)
-                    assert isinstance(buffer, bytes)
-                    if not buffer:
-                        break
-                    buffers.append(buffer)
-                    detector.feed(buffer)
-                detector.close()
-                encoding = lookupEncoding(detector.result['encoding'])
-                self.rawStream.seek(0)
-                if encoding is not None:
-                    return encoding, "tentative"
-
-        # Try the default encoding
-        charEncoding = lookupEncoding(self.default_encoding), "tentative"
-        if charEncoding[0] is not None:
-            return charEncoding
-
-        # Fallback to html5lib's default if even that hasn't worked
-        return lookupEncoding("windows-1252"), "tentative"
-
-    def changeEncoding(self, newEncoding):
-        assert self.charEncoding[1] != "certain"
-        newEncoding = lookupEncoding(newEncoding)
-        if newEncoding is None:
-            return
-        if newEncoding.name in ("utf-16be", "utf-16le"):
-            newEncoding = lookupEncoding("utf-8")
-            assert newEncoding is not None
-        elif newEncoding == self.charEncoding[0]:
-            self.charEncoding = (self.charEncoding[0], "certain")
-        else:
-            self.rawStream.seek(0)
-            self.charEncoding = (newEncoding, "certain")
-            self.reset()
-            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
-
-    def detectBOM(self):
-        """Attempts to detect at BOM at the start of the stream. If
-        an encoding can be determined from the BOM return the name of the
-        encoding otherwise return None"""
-        bomDict = {
-            codecs.BOM_UTF8: 'utf-8',
-            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
-            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
-        }
-
-        # Go to beginning of file and read in 4 bytes
-        string = self.rawStream.read(4)
-        assert isinstance(string, bytes)
-
-        # Try detecting the BOM using bytes from the string
-        encoding = bomDict.get(string[:3])         # UTF-8
-        seek = 3
-        if not encoding:
-            # Need to detect UTF-32 before UTF-16
-            encoding = bomDict.get(string)         # UTF-32
-            seek = 4
-            if not encoding:
-                encoding = bomDict.get(string[:2])  # UTF-16
-                seek = 2
-
-        # Set the read position past the BOM if one was found, otherwise
-        # set it to the start of the stream
-        if encoding:
-            self.rawStream.seek(seek)
-            return lookupEncoding(encoding)
-        else:
-            self.rawStream.seek(0)
-            return None
-
-    def detectEncodingMeta(self):
-        """Report the encoding declared by the meta element
-        """
-        buffer = self.rawStream.read(self.numBytesMeta)
-        assert isinstance(buffer, bytes)
-        parser = EncodingParser(buffer)
-        self.rawStream.seek(0)
-        encoding = parser.getEncoding()
-
-        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
-            encoding = lookupEncoding("utf-8")
-
-        return encoding
-
-
-class EncodingBytes(bytes):
-    """String-like object with an associated position and various extra methods
-    If the position is ever greater than the string length then an exception is
-    raised"""
-    def __new__(self, value):
-        assert isinstance(value, bytes)
-        return bytes.__new__(self, value.lower())
-
-    def __init__(self, value):
-        # pylint:disable=unused-argument
-        self._position = -1
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        p = self._position = self._position + 1
-        if p >= len(self):
-            raise StopIteration
-        elif p < 0:
-            raise TypeError
-        return self[p:p + 1]
-
-    def next(self):
-        # Py2 compat
-        return self.__next__()
-
-    def previous(self):
-        p = self._position
-        if p >= len(self):
-            raise StopIteration
-        elif p < 0:
-            raise TypeError
-        self._position = p = p - 1
-        return self[p:p + 1]
-
-    def setPosition(self, position):
-        if self._position >= len(self):
-            raise StopIteration
-        self._position = position
-
-    def getPosition(self):
-        if self._position >= len(self):
-            raise StopIteration
-        if self._position >= 0:
-            return self._position
-        else:
-            return None
-
-    position = property(getPosition, setPosition)
-
-    def getCurrentByte(self):
-        return self[self.position:self.position + 1]
-
-    currentByte = property(getCurrentByte)
-
-    def skip(self, chars=spaceCharactersBytes):
-        """Skip past a list of characters"""
-        p = self.position               # use property for the error-checking
-        while p < len(self):
-            c = self[p:p + 1]
-            if c not in chars:
-                self._position = p
-                return c
-            p += 1
-        self._position = p
-        return None
-
-    def skipUntil(self, chars):
-        p = self.position
-        while p < len(self):
-            c = self[p:p + 1]
-            if c in chars:
-                self._position = p
-                return c
-            p += 1
-        self._position = p
-        return None
-
-    def matchBytes(self, bytes):
-        """Look for a sequence of bytes at the start of a string. If the bytes
-        are found return True and advance the position to the byte after the
-        match. Otherwise return False and leave the position alone"""
-        rv = self.startswith(bytes, self.position)
-        if rv:
-            self.position += len(bytes)
-        return rv
-
-    def jumpTo(self, bytes):
-        """Look for the next sequence of bytes matching a given sequence. If
-        a match is found advance the position to the last byte of the match"""
-        try:
-            self._position = self.index(bytes, self.position) + len(bytes) - 1
-        except ValueError:
-            raise StopIteration
-        return True
-
-
-class EncodingParser(object):
-    """Mini parser for detecting character encoding from meta elements"""
-
-    def __init__(self, data):
-        """string - the data to work on for encoding detection"""
-        self.data = EncodingBytes(data)
-        self.encoding = None
-
-    def getEncoding(self):
-        if b"<meta" not in self.data:
-            return None
-
-        methodDispatch = (
-            (b"<!--", self.handleComment),
-            (b"<meta", self.handleMeta),
-            (b"</", self.handlePossibleEndTag),
-            (b"<!", self.handleOther),
-            (b"<?", self.handleOther),
-            (b"<", self.handlePossibleStartTag))
-        for _ in self.data:
-            keepParsing = True
-            try:
-                self.data.jumpTo(b"<")
-            except StopIteration:
-                break
-            for key, method in methodDispatch:
-                if self.data.matchBytes(key):
-                    try:
-                        keepParsing = method()
-                        break
-                    except StopIteration:
-                        keepParsing = False
-                        break
-            if not keepParsing:
-                break
-
-        return self.encoding
-
-    def handleComment(self):
-        """Skip over comments"""
-        return self.data.jumpTo(b"-->")
-
-    def handleMeta(self):
-        if self.data.currentByte not in spaceCharactersBytes:
-            # if we have <meta not followed by a space so just keep going
-            return True
-        # We have a valid meta element we want to search for attributes
-        hasPragma = False
-        pendingEncoding = None
-        while True:
-            # Try to find the next attribute after the current position
-            attr = self.getAttribute()
-            if attr is None:
-                return True
-            else:
-                if attr[0] == b"http-equiv":
-                    hasPragma = attr[1] == b"content-type"
-                    if hasPragma and pendingEncoding is not None:
-                        self.encoding = pendingEncoding
-                        return False
-                elif attr[0] == b"charset":
-                    tentativeEncoding = attr[1]
-                    codec = lookupEncoding(tentativeEncoding)
-                    if codec is not None:
-                        self.encoding = codec
-                        return False
-                elif attr[0] == b"content":
-                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
-                    tentativeEncoding = contentParser.parse()
-                    if tentativeEncoding is not None:
-                        codec = lookupEncoding(tentativeEncoding)
-                        if codec is not None:
-                            if hasPragma:
-                                self.encoding = codec
-                                return False
-                            else:
-                                pendingEncoding = codec
-
-    def handlePossibleStartTag(self):
-        return self.handlePossibleTag(False)
-
-    def handlePossibleEndTag(self):
-        next(self.data)
-        return self.handlePossibleTag(True)
-
-    def handlePossibleTag(self, endTag):
-        data = self.data
-        if data.currentByte not in asciiLettersBytes:
-            # If the next byte is not an ascii letter either ignore this
-            # fragment (possible start tag case) or treat it according to
-            # handleOther
-            if endTag:
-                data.previous()
-                self.handleOther()
-            return True
-
-        c = data.skipUntil(spacesAngleBrackets)
-        if c == b"<":
-            # return to the first step in the overall "two step" algorithm
-            # reprocessing the < byte
-            data.previous()
-        else:
-            # Read all attributes
-            attr = self.getAttribute()
-            while attr is not None:
-                attr = self.getAttribute()
-        return True
-
-    def handleOther(self):
-        return self.data.jumpTo(b">")
-
-    def getAttribute(self):
-        """Return a name,value pair for the next attribute in the stream,
-        if one is found, or None"""
-        data = self.data
-        # Step 1 (skip chars)
-        c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
-        assert c is None or len(c) == 1
-        # Step 2
-        if c in (b">", None):
-            return None
-        # Step 3
-        attrName = []
-        attrValue = []
-        # Step 4 attribute name
-        while True:
-            if c == b"=" and attrName:
-                break
-            elif c in spaceCharactersBytes:
-                # Step 6!
-                c = data.skip()
-                break
-            elif c in (b"/", b">"):
-                return b"".join(attrName), b""
-            elif c in asciiUppercaseBytes:
-                attrName.append(c.lower())
-            elif c is None:
-                return None
-            else:
-                attrName.append(c)
-            # Step 5
-            c = next(data)
-        # Step 7
-        if c != b"=":
-            data.previous()
-            return b"".join(attrName), b""
-        # Step 8
-        next(data)
-        # Step 9
-        c = data.skip()
-        # Step 10
-        if c in (b"'", b'"'):
-            # 10.1
-            quoteChar = c
-            while True:
-                # 10.2
-                c = next(data)
-                # 10.3
-                if c == quoteChar:
-                    next(data)
-                    return b"".join(attrName), b"".join(attrValue)
-                # 10.4
-                elif c in asciiUppercaseBytes:
-                    attrValue.append(c.lower())
-                # 10.5
-                else:
-                    attrValue.append(c)
-        elif c == b">":
-            return b"".join(attrName), b""
-        elif c in asciiUppercaseBytes:
-            attrValue.append(c.lower())
-        elif c is None:
-            return None
-        else:
-            attrValue.append(c)
-        # Step 11
-        while True:
-            c = next(data)
-            if c in spacesAngleBrackets:
-                return b"".join(attrName), b"".join(attrValue)
-            elif c in asciiUppercaseBytes:
-                attrValue.append(c.lower())
-            elif c is None:
-                return None
-            else:
-                attrValue.append(c)
-
-
-class ContentAttrParser(object):
-    def __init__(self, data):
-        assert isinstance(data, bytes)
-        self.data = data
-
-    def parse(self):
-        try:
-            # Check if the attr name is charset
-            # otherwise return
-            self.data.jumpTo(b"charset")
-            self.data.position += 1
-            self.data.skip()
-            if not self.data.currentByte == b"=":
-                # If there is no = sign keep looking for attrs
-                return None
-            self.data.position += 1
-            self.data.skip()
-            # Look for an encoding between matching quote marks
-            if self.data.currentByte in (b'"', b"'"):
-                quoteMark = self.data.currentByte
-                self.data.position += 1
-                oldPosition = self.data.position
-                if self.data.jumpTo(quoteMark):
-                    return self.data[oldPosition:self.data.position]
-                else:
-                    return None
-            else:
-                # Unquoted value
-                oldPosition = self.data.position
-                try:
-                    self.data.skipUntil(spaceCharactersBytes)
-                    return self.data[oldPosition:self.data.position]
-                except StopIteration:
-                    # Return the whole remaining value
-                    return self.data[oldPosition:]
-        except StopIteration:
-            return None
-
-
-def lookupEncoding(encoding):
-    """Return the python codec name corresponding to an encoding or None if the
-    string doesn't correspond to a valid encoding."""
-    if isinstance(encoding, bytes):
-        try:
-            encoding = encoding.decode("ascii")
-        except UnicodeDecodeError:
-            return None
-
-    if encoding is not None:
-        try:
-            return webencodings.lookup(encoding)
-        except AttributeError:
-            return None
-    else:
-        return None
diff --git a/src/pip/_vendor/html5lib/_tokenizer.py b/src/pip/_vendor/html5lib/_tokenizer.py
deleted file mode 100644
index 5f00253e2..000000000
--- a/src/pip/_vendor/html5lib/_tokenizer.py
+++ /dev/null
@@ -1,1735 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from pip._vendor.six import unichr as chr
-
-from collections import deque, OrderedDict
-from sys import version_info
-
-from .constants import spaceCharacters
-from .constants import entities
-from .constants import asciiLetters, asciiUpper2Lower
-from .constants import digits, hexDigits, EOF
-from .constants import tokenTypes, tagTokenTypes
-from .constants import replacementCharacters
-
-from ._inputstream import HTMLInputStream
-
-from ._trie import Trie
-
-entitiesTrie = Trie(entities)
-
-if version_info >= (3, 7):
-    attributeMap = dict
-else:
-    attributeMap = OrderedDict
-
-
-class HTMLTokenizer(object):
-    """ This class takes care of tokenizing HTML.
-
-    * self.currentToken
-      Holds the token that is currently being processed.
-
-    * self.state
-      Holds a reference to the method to be invoked... XXX
-
-    * self.stream
-      Points to HTMLInputStream object.
-    """
-
-    def __init__(self, stream, parser=None, **kwargs):
-
-        self.stream = HTMLInputStream(stream, **kwargs)
-        self.parser = parser
-
-        # Setup the initial tokenizer state
-        self.escapeFlag = False
-        self.lastFourChars = []
-        self.state = self.dataState
-        self.escape = False
-
-        # The current token being created
-        self.currentToken = None
-        super(HTMLTokenizer, self).__init__()
-
-    def __iter__(self):
-        """ This is where the magic happens.
-
-        We do our usually processing through the states and when we have a token
-        to return we yield the token which pauses processing until the next token
-        is requested.
-        """
-        self.tokenQueue = deque([])
-        # Start processing. When EOF is reached self.state will return False
-        # instead of True and the loop will terminate.
-        while self.state():
-            while self.stream.errors:
-                yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
-            while self.tokenQueue:
-                yield self.tokenQueue.popleft()
-
-    def consumeNumberEntity(self, isHex):
-        """This function returns either U+FFFD or the character based on the
-        decimal or hexadecimal representation. It also discards ";" if present.
-        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
-        """
-
-        allowed = digits
-        radix = 10
-        if isHex:
-            allowed = hexDigits
-            radix = 16
-
-        charStack = []
-
-        # Consume all the characters that are in range while making sure we
-        # don't hit an EOF.
-        c = self.stream.char()
-        while c in allowed and c is not EOF:
-            charStack.append(c)
-            c = self.stream.char()
-
-        # Convert the set of characters consumed to an int.
-        charAsInt = int("".join(charStack), radix)
-
-        # Certain characters get replaced with others
-        if charAsInt in replacementCharacters:
-            char = replacementCharacters[charAsInt]
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "illegal-codepoint-for-numeric-entity",
-                                    "datavars": {"charAsInt": charAsInt}})
-        elif ((0xD800 <= charAsInt <= 0xDFFF) or
-              (charAsInt > 0x10FFFF)):
-            char = "\uFFFD"
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "illegal-codepoint-for-numeric-entity",
-                                    "datavars": {"charAsInt": charAsInt}})
-        else:
-            # Should speed up this check somehow (e.g. move the set to a constant)
-            if ((0x0001 <= charAsInt <= 0x0008) or
-                (0x000E <= charAsInt <= 0x001F) or
-                (0x007F <= charAsInt <= 0x009F) or
-                (0xFDD0 <= charAsInt <= 0xFDEF) or
-                charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
-                                        0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
-                                        0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
-                                        0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
-                                        0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
-                                        0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
-                                        0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
-                                        0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
-                                        0xFFFFF, 0x10FFFE, 0x10FFFF])):
-                self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                        "data":
-                                        "illegal-codepoint-for-numeric-entity",
-                                        "datavars": {"charAsInt": charAsInt}})
-            try:
-                # Try/except needed as UCS-2 Python builds' unichar only works
-                # within the BMP.
-                char = chr(charAsInt)
-            except ValueError:
-                v = charAsInt - 0x10000
-                char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF))
-
-        # Discard the ; if present. Otherwise, put it back on the queue and
-        # invoke parseError on parser.
-        if c != ";":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "numeric-entity-without-semicolon"})
-            self.stream.unget(c)
-
-        return char
-
-    def consumeEntity(self, allowedChar=None, fromAttribute=False):
-        # Initialise to the default output for when no entity is matched
-        output = "&"
-
-        charStack = [self.stream.char()]
-        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
-                (allowedChar is not None and allowedChar == charStack[0])):
-            self.stream.unget(charStack[0])
-
-        elif charStack[0] == "#":
-            # Read the next character to see if it's hex or decimal
-            hex = False
-            charStack.append(self.stream.char())
-            if charStack[-1] in ("x", "X"):
-                hex = True
-                charStack.append(self.stream.char())
-
-            # charStack[-1] should be the first digit
-            if (hex and charStack[-1] in hexDigits) \
-                    or (not hex and charStack[-1] in digits):
-                # At least one digit found, so consume the whole number
-                self.stream.unget(charStack[-1])
-                output = self.consumeNumberEntity(hex)
-            else:
-                # No digits found
-                self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                        "data": "expected-numeric-entity"})
-                self.stream.unget(charStack.pop())
-                output = "&" + "".join(charStack)
-
-        else:
-            # At this point in the process might have named entity. Entities
-            # are stored in the global variable "entities".
-            #
-            # Consume characters and compare to these to a substring of the
-            # entity names in the list until the substring no longer matches.
-            while (charStack[-1] is not EOF):
-                if not entitiesTrie.has_keys_with_prefix("".join(charStack)):
-                    break
-                charStack.append(self.stream.char())
-
-            # At this point we have a string that starts with some characters
-            # that may match an entity
-            # Try to find the longest entity the string will match to take care
-            # of &noti for instance.
-            try:
-                entityName = entitiesTrie.longest_prefix("".join(charStack[:-1]))
-                entityLength = len(entityName)
-            except KeyError:
-                entityName = None
-
-            if entityName is not None:
-                if entityName[-1] != ";":
-                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                            "named-entity-without-semicolon"})
-                if (entityName[-1] != ";" and fromAttribute and
-                    (charStack[entityLength] in asciiLetters or
-                     charStack[entityLength] in digits or
-                     charStack[entityLength] == "=")):
-                    self.stream.unget(charStack.pop())
-                    output = "&" + "".join(charStack)
-                else:
-                    output = entities[entityName]
-                    self.stream.unget(charStack.pop())
-                    output += "".join(charStack[entityLength:])
-            else:
-                self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                        "expected-named-entity"})
-                self.stream.unget(charStack.pop())
-                output = "&" + "".join(charStack)
-
-        if fromAttribute:
-            self.currentToken["data"][-1][1] += output
-        else:
-            if output in spaceCharacters:
-                tokenType = "SpaceCharacters"
-            else:
-                tokenType = "Characters"
-            self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
-
-    def processEntityInAttribute(self, allowedChar):
-        """This method replaces the need for "entityInAttributeValueState".
-        """
-        self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
-
-    def emitCurrentToken(self):
-        """This method is a generic handler for emitting the tags. It also sets
-        the state to "data" because that's what's needed after a token has been
-        emitted.
-        """
-        token = self.currentToken
-        # Add token to the queue to be yielded
-        if (token["type"] in tagTokenTypes):
-            token["name"] = token["name"].translate(asciiUpper2Lower)
-            if token["type"] == tokenTypes["StartTag"]:
-                raw = token["data"]
-                data = attributeMap(raw)
-                if len(raw) > len(data):
-                    # we had some duplicated attribute, fix so first wins
-                    data.update(raw[::-1])
-                token["data"] = data
-
-            if token["type"] == tokenTypes["EndTag"]:
-                if token["data"]:
-                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                            "data": "attributes-in-end-tag"})
-                if token["selfClosing"]:
-                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                            "data": "self-closing-flag-on-end-tag"})
-        self.tokenQueue.append(token)
-        self.state = self.dataState
-
-    # Below are the various tokenizer states worked out.
-    def dataState(self):
-        data = self.stream.char()
-        if data == "&":
-            self.state = self.entityDataState
-        elif data == "<":
-            self.state = self.tagOpenState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\u0000"})
-        elif data is EOF:
-            # Tokenization ends.
-            return False
-        elif data in spaceCharacters:
-            # Directly after emitting a token you switch back to the "data
-            # state". At that point spaceCharacters are important so they are
-            # emitted separately.
-            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
-                                    data + self.stream.charsUntil(spaceCharacters, True)})
-            # No need to update lastFourChars here, since the first space will
-            # have already been appended to lastFourChars and will have broken
-            # any <!-- or --> sequences
-        else:
-            chars = self.stream.charsUntil(("&", "<", "\u0000"))
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-                                    data + chars})
-        return True
-
-    def entityDataState(self):
-        self.consumeEntity()
-        self.state = self.dataState
-        return True
-
-    def rcdataState(self):
-        data = self.stream.char()
-        if data == "&":
-            self.state = self.characterReferenceInRcdata
-        elif data == "<":
-            self.state = self.rcdataLessThanSignState
-        elif data == EOF:
-            # Tokenization ends.
-            return False
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-        elif data in spaceCharacters:
-            # Directly after emitting a token you switch back to the "data
-            # state". At that point spaceCharacters are important so they are
-            # emitted separately.
-            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
-                                    data + self.stream.charsUntil(spaceCharacters, True)})
-            # No need to update lastFourChars here, since the first space will
-            # have already been appended to lastFourChars and will have broken
-            # any <!-- or --> sequences
-        else:
-            chars = self.stream.charsUntil(("&", "<", "\u0000"))
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-                                    data + chars})
-        return True
-
-    def characterReferenceInRcdata(self):
-        self.consumeEntity()
-        self.state = self.rcdataState
-        return True
-
-    def rawtextState(self):
-        data = self.stream.char()
-        if data == "<":
-            self.state = self.rawtextLessThanSignState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-        elif data == EOF:
-            # Tokenization ends.
-            return False
-        else:
-            chars = self.stream.charsUntil(("<", "\u0000"))
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-                                    data + chars})
-        return True
-
-    def scriptDataState(self):
-        data = self.stream.char()
-        if data == "<":
-            self.state = self.scriptDataLessThanSignState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-        elif data == EOF:
-            # Tokenization ends.
-            return False
-        else:
-            chars = self.stream.charsUntil(("<", "\u0000"))
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-                                    data + chars})
-        return True
-
-    def plaintextState(self):
-        data = self.stream.char()
-        if data == EOF:
-            # Tokenization ends.
-            return False
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-                                    data + self.stream.charsUntil("\u0000")})
-        return True
-
-    def tagOpenState(self):
-        data = self.stream.char()
-        if data == "!":
-            self.state = self.markupDeclarationOpenState
-        elif data == "/":
-            self.state = self.closeTagOpenState
-        elif data in asciiLetters:
-            self.currentToken = {"type": tokenTypes["StartTag"],
-                                 "name": data, "data": [],
-                                 "selfClosing": False,
-                                 "selfClosingAcknowledged": False}
-            self.state = self.tagNameState
-        elif data == ">":
-            # XXX In theory it could be something besides a tag name. But
-            # do we really care?
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-tag-name-but-got-right-bracket"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
-            self.state = self.dataState
-        elif data == "?":
-            # XXX In theory it could be something besides a tag name. But
-            # do we really care?
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-tag-name-but-got-question-mark"})
-            self.stream.unget(data)
-            self.state = self.bogusCommentState
-        else:
-            # XXX
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-tag-name"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.stream.unget(data)
-            self.state = self.dataState
-        return True
-
-    def closeTagOpenState(self):
-        data = self.stream.char()
-        if data in asciiLetters:
-            self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
-                                 "data": [], "selfClosing": False}
-            self.state = self.tagNameState
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-closing-tag-but-got-right-bracket"})
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-closing-tag-but-got-eof"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
-            self.state = self.dataState
-        else:
-            # XXX data can be _'_...
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-closing-tag-but-got-char",
-                                    "datavars": {"data": data}})
-            self.stream.unget(data)
-            self.state = self.bogusCommentState
-        return True
-
-    def tagNameState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.beforeAttributeNameState
-        elif data == ">":
-            self.emitCurrentToken()
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-tag-name"})
-            self.state = self.dataState
-        elif data == "/":
-            self.state = self.selfClosingStartTagState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["name"] += "\uFFFD"
-        else:
-            self.currentToken["name"] += data
-            # (Don't use charsUntil here, because tag names are
-            # very short and it's faster to not do anything fancy)
-        return True
-
-    def rcdataLessThanSignState(self):
-        data = self.stream.char()
-        if data == "/":
-            self.temporaryBuffer = ""
-            self.state = self.rcdataEndTagOpenState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.stream.unget(data)
-            self.state = self.rcdataState
-        return True
-
-    def rcdataEndTagOpenState(self):
-        data = self.stream.char()
-        if data in asciiLetters:
-            self.temporaryBuffer += data
-            self.state = self.rcdataEndTagNameState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
-            self.stream.unget(data)
-            self.state = self.rcdataState
-        return True
-
-    def rcdataEndTagNameState(self):
-        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
-        data = self.stream.char()
-        if data in spaceCharacters and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.beforeAttributeNameState
-        elif data == "/" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.selfClosingStartTagState
-        elif data == ">" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.emitCurrentToken()
-            self.state = self.dataState
-        elif data in asciiLetters:
-            self.temporaryBuffer += data
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "</" + self.temporaryBuffer})
-            self.stream.unget(data)
-            self.state = self.rcdataState
-        return True
-
-    def rawtextLessThanSignState(self):
-        data = self.stream.char()
-        if data == "/":
-            self.temporaryBuffer = ""
-            self.state = self.rawtextEndTagOpenState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.stream.unget(data)
-            self.state = self.rawtextState
-        return True
-
-    def rawtextEndTagOpenState(self):
-        data = self.stream.char()
-        if data in asciiLetters:
-            self.temporaryBuffer += data
-            self.state = self.rawtextEndTagNameState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
-            self.stream.unget(data)
-            self.state = self.rawtextState
-        return True
-
-    def rawtextEndTagNameState(self):
-        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
-        data = self.stream.char()
-        if data in spaceCharacters and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.beforeAttributeNameState
-        elif data == "/" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.selfClosingStartTagState
-        elif data == ">" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.emitCurrentToken()
-            self.state = self.dataState
-        elif data in asciiLetters:
-            self.temporaryBuffer += data
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "</" + self.temporaryBuffer})
-            self.stream.unget(data)
-            self.state = self.rawtextState
-        return True
-
-    def scriptDataLessThanSignState(self):
-        data = self.stream.char()
-        if data == "/":
-            self.temporaryBuffer = ""
-            self.state = self.scriptDataEndTagOpenState
-        elif data == "!":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"})
-            self.state = self.scriptDataEscapeStartState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.stream.unget(data)
-            self.state = self.scriptDataState
-        return True
-
-    def scriptDataEndTagOpenState(self):
-        data = self.stream.char()
-        if data in asciiLetters:
-            self.temporaryBuffer += data
-            self.state = self.scriptDataEndTagNameState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
-            self.stream.unget(data)
-            self.state = self.scriptDataState
-        return True
-
-    def scriptDataEndTagNameState(self):
-        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
-        data = self.stream.char()
-        if data in spaceCharacters and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.beforeAttributeNameState
-        elif data == "/" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.selfClosingStartTagState
-        elif data == ">" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.emitCurrentToken()
-            self.state = self.dataState
-        elif data in asciiLetters:
-            self.temporaryBuffer += data
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "</" + self.temporaryBuffer})
-            self.stream.unget(data)
-            self.state = self.scriptDataState
-        return True
-
-    def scriptDataEscapeStartState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-            self.state = self.scriptDataEscapeStartDashState
-        else:
-            self.stream.unget(data)
-            self.state = self.scriptDataState
-        return True
-
-    def scriptDataEscapeStartDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-            self.state = self.scriptDataEscapedDashDashState
-        else:
-            self.stream.unget(data)
-            self.state = self.scriptDataState
-        return True
-
-    def scriptDataEscapedState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-            self.state = self.scriptDataEscapedDashState
-        elif data == "<":
-            self.state = self.scriptDataEscapedLessThanSignState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-        elif data == EOF:
-            self.state = self.dataState
-        else:
-            chars = self.stream.charsUntil(("<", "-", "\u0000"))
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-                                    data + chars})
-        return True
-
-    def scriptDataEscapedDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-            self.state = self.scriptDataEscapedDashDashState
-        elif data == "<":
-            self.state = self.scriptDataEscapedLessThanSignState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-            self.state = self.scriptDataEscapedState
-        elif data == EOF:
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            self.state = self.scriptDataEscapedState
-        return True
-
-    def scriptDataEscapedDashDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-        elif data == "<":
-            self.state = self.scriptDataEscapedLessThanSignState
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
-            self.state = self.scriptDataState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-            self.state = self.scriptDataEscapedState
-        elif data == EOF:
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            self.state = self.scriptDataEscapedState
-        return True
-
-    def scriptDataEscapedLessThanSignState(self):
-        data = self.stream.char()
-        if data == "/":
-            self.temporaryBuffer = ""
-            self.state = self.scriptDataEscapedEndTagOpenState
-        elif data in asciiLetters:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data})
-            self.temporaryBuffer = data
-            self.state = self.scriptDataDoubleEscapeStartState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.stream.unget(data)
-            self.state = self.scriptDataEscapedState
-        return True
-
-    def scriptDataEscapedEndTagOpenState(self):
-        data = self.stream.char()
-        if data in asciiLetters:
-            self.temporaryBuffer = data
-            self.state = self.scriptDataEscapedEndTagNameState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
-            self.stream.unget(data)
-            self.state = self.scriptDataEscapedState
-        return True
-
-    def scriptDataEscapedEndTagNameState(self):
-        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
-        data = self.stream.char()
-        if data in spaceCharacters and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.beforeAttributeNameState
-        elif data == "/" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.state = self.selfClosingStartTagState
-        elif data == ">" and appropriate:
-            self.currentToken = {"type": tokenTypes["EndTag"],
-                                 "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing": False}
-            self.emitCurrentToken()
-            self.state = self.dataState
-        elif data in asciiLetters:
-            self.temporaryBuffer += data
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "</" + self.temporaryBuffer})
-            self.stream.unget(data)
-            self.state = self.scriptDataEscapedState
-        return True
-
-    def scriptDataDoubleEscapeStartState(self):
-        data = self.stream.char()
-        if data in (spaceCharacters | frozenset(("/", ">"))):
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            if self.temporaryBuffer.lower() == "script":
-                self.state = self.scriptDataDoubleEscapedState
-            else:
-                self.state = self.scriptDataEscapedState
-        elif data in asciiLetters:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            self.temporaryBuffer += data
-        else:
-            self.stream.unget(data)
-            self.state = self.scriptDataEscapedState
-        return True
-
-    def scriptDataDoubleEscapedState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-            self.state = self.scriptDataDoubleEscapedDashState
-        elif data == "<":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.state = self.scriptDataDoubleEscapedLessThanSignState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-        elif data == EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-script-in-script"})
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-        return True
-
-    def scriptDataDoubleEscapedDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-            self.state = self.scriptDataDoubleEscapedDashDashState
-        elif data == "<":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.state = self.scriptDataDoubleEscapedLessThanSignState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-            self.state = self.scriptDataDoubleEscapedState
-        elif data == EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-script-in-script"})
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            self.state = self.scriptDataDoubleEscapedState
-        return True
-
-    def scriptDataDoubleEscapedDashDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
-        elif data == "<":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
-            self.state = self.scriptDataDoubleEscapedLessThanSignState
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
-            self.state = self.scriptDataState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": "\uFFFD"})
-            self.state = self.scriptDataDoubleEscapedState
-        elif data == EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-script-in-script"})
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            self.state = self.scriptDataDoubleEscapedState
-        return True
-
-    def scriptDataDoubleEscapedLessThanSignState(self):
-        data = self.stream.char()
-        if data == "/":
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"})
-            self.temporaryBuffer = ""
-            self.state = self.scriptDataDoubleEscapeEndState
-        else:
-            self.stream.unget(data)
-            self.state = self.scriptDataDoubleEscapedState
-        return True
-
-    def scriptDataDoubleEscapeEndState(self):
-        data = self.stream.char()
-        if data in (spaceCharacters | frozenset(("/", ">"))):
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            if self.temporaryBuffer.lower() == "script":
-                self.state = self.scriptDataEscapedState
-            else:
-                self.state = self.scriptDataDoubleEscapedState
-        elif data in asciiLetters:
-            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
-            self.temporaryBuffer += data
-        else:
-            self.stream.unget(data)
-            self.state = self.scriptDataDoubleEscapedState
-        return True
-
-    def beforeAttributeNameState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.stream.charsUntil(spaceCharacters, True)
-        elif data in asciiLetters:
-            self.currentToken["data"].append([data, ""])
-            self.state = self.attributeNameState
-        elif data == ">":
-            self.emitCurrentToken()
-        elif data == "/":
-            self.state = self.selfClosingStartTagState
-        elif data in ("'", '"', "=", "<"):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "invalid-character-in-attribute-name"})
-            self.currentToken["data"].append([data, ""])
-            self.state = self.attributeNameState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"].append(["\uFFFD", ""])
-            self.state = self.attributeNameState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-attribute-name-but-got-eof"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"].append([data, ""])
-            self.state = self.attributeNameState
-        return True
-
-    def attributeNameState(self):
-        data = self.stream.char()
-        leavingThisState = True
-        emitToken = False
-        if data == "=":
-            self.state = self.beforeAttributeValueState
-        elif data in asciiLetters:
-            self.currentToken["data"][-1][0] += data +\
-                self.stream.charsUntil(asciiLetters, True)
-            leavingThisState = False
-        elif data == ">":
-            # XXX If we emit here the attributes are converted to a dict
-            # without being checked and when the code below runs we error
-            # because data is a dict not a list
-            emitToken = True
-        elif data in spaceCharacters:
-            self.state = self.afterAttributeNameState
-        elif data == "/":
-            self.state = self.selfClosingStartTagState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"][-1][0] += "\uFFFD"
-            leavingThisState = False
-        elif data in ("'", '"', "<"):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data":
-                                    "invalid-character-in-attribute-name"})
-            self.currentToken["data"][-1][0] += data
-            leavingThisState = False
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "eof-in-attribute-name"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"][-1][0] += data
-            leavingThisState = False
-
-        if leavingThisState:
-            # Attributes are not dropped at this stage. That happens when the
-            # start tag token is emitted so values can still be safely appended
-            # to attributes, but we do want to report the parse error in time.
-            self.currentToken["data"][-1][0] = (
-                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
-            for name, _ in self.currentToken["data"][:-1]:
-                if self.currentToken["data"][-1][0] == name:
-                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                            "duplicate-attribute"})
-                    break
-            # XXX Fix for above XXX
-            if emitToken:
-                self.emitCurrentToken()
-        return True
-
-    def afterAttributeNameState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.stream.charsUntil(spaceCharacters, True)
-        elif data == "=":
-            self.state = self.beforeAttributeValueState
-        elif data == ">":
-            self.emitCurrentToken()
-        elif data in asciiLetters:
-            self.currentToken["data"].append([data, ""])
-            self.state = self.attributeNameState
-        elif data == "/":
-            self.state = self.selfClosingStartTagState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"].append(["\uFFFD", ""])
-            self.state = self.attributeNameState
-        elif data in ("'", '"', "<"):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "invalid-character-after-attribute-name"})
-            self.currentToken["data"].append([data, ""])
-            self.state = self.attributeNameState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-end-of-tag-but-got-eof"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"].append([data, ""])
-            self.state = self.attributeNameState
-        return True
-
-    def beforeAttributeValueState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.stream.charsUntil(spaceCharacters, True)
-        elif data == "\"":
-            self.state = self.attributeValueDoubleQuotedState
-        elif data == "&":
-            self.state = self.attributeValueUnQuotedState
-            self.stream.unget(data)
-        elif data == "'":
-            self.state = self.attributeValueSingleQuotedState
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-attribute-value-but-got-right-bracket"})
-            self.emitCurrentToken()
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"][-1][1] += "\uFFFD"
-            self.state = self.attributeValueUnQuotedState
-        elif data in ("=", "<", "`"):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "equals-in-unquoted-attribute-value"})
-            self.currentToken["data"][-1][1] += data
-            self.state = self.attributeValueUnQuotedState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-attribute-value-but-got-eof"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"][-1][1] += data
-            self.state = self.attributeValueUnQuotedState
-        return True
-
-    def attributeValueDoubleQuotedState(self):
-        data = self.stream.char()
-        if data == "\"":
-            self.state = self.afterAttributeValueState
-        elif data == "&":
-            self.processEntityInAttribute('"')
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"][-1][1] += "\uFFFD"
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-attribute-value-double-quote"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"][-1][1] += data +\
-                self.stream.charsUntil(("\"", "&", "\u0000"))
-        return True
-
-    def attributeValueSingleQuotedState(self):
-        data = self.stream.char()
-        if data == "'":
-            self.state = self.afterAttributeValueState
-        elif data == "&":
-            self.processEntityInAttribute("'")
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"][-1][1] += "\uFFFD"
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-attribute-value-single-quote"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"][-1][1] += data +\
-                self.stream.charsUntil(("'", "&", "\u0000"))
-        return True
-
-    def attributeValueUnQuotedState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.beforeAttributeNameState
-        elif data == "&":
-            self.processEntityInAttribute(">")
-        elif data == ">":
-            self.emitCurrentToken()
-        elif data in ('"', "'", "=", "<", "`"):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-character-in-unquoted-attribute-value"})
-            self.currentToken["data"][-1][1] += data
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"][-1][1] += "\uFFFD"
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-attribute-value-no-quotes"})
-            self.state = self.dataState
-        else:
-            self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
-                frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters)
-        return True
-
-    def afterAttributeValueState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.beforeAttributeNameState
-        elif data == ">":
-            self.emitCurrentToken()
-        elif data == "/":
-            self.state = self.selfClosingStartTagState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-EOF-after-attribute-value"})
-            self.stream.unget(data)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-character-after-attribute-value"})
-            self.stream.unget(data)
-            self.state = self.beforeAttributeNameState
-        return True
-
-    def selfClosingStartTagState(self):
-        data = self.stream.char()
-        if data == ">":
-            self.currentToken["selfClosing"] = True
-            self.emitCurrentToken()
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data":
-                                    "unexpected-EOF-after-solidus-in-tag"})
-            self.stream.unget(data)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-character-after-solidus-in-tag"})
-            self.stream.unget(data)
-            self.state = self.beforeAttributeNameState
-        return True
-
-    def bogusCommentState(self):
-        # Make a new comment token and give it as value all the characters
-        # until the first > or EOF (charsUntil checks for EOF automatically)
-        # and emit it.
-        data = self.stream.charsUntil(">")
-        data = data.replace("\u0000", "\uFFFD")
-        self.tokenQueue.append(
-            {"type": tokenTypes["Comment"], "data": data})
-
-        # Eat the character directly after the bogus comment which is either a
-        # ">" or an EOF.
-        self.stream.char()
-        self.state = self.dataState
-        return True
-
-    def markupDeclarationOpenState(self):
-        charStack = [self.stream.char()]
-        if charStack[-1] == "-":
-            charStack.append(self.stream.char())
-            if charStack[-1] == "-":
-                self.currentToken = {"type": tokenTypes["Comment"], "data": ""}
-                self.state = self.commentStartState
-                return True
-        elif charStack[-1] in ('d', 'D'):
-            matched = True
-            for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'),
-                             ('y', 'Y'), ('p', 'P'), ('e', 'E')):
-                charStack.append(self.stream.char())
-                if charStack[-1] not in expected:
-                    matched = False
-                    break
-            if matched:
-                self.currentToken = {"type": tokenTypes["Doctype"],
-                                     "name": "",
-                                     "publicId": None, "systemId": None,
-                                     "correct": True}
-                self.state = self.doctypeState
-                return True
-        elif (charStack[-1] == "[" and
-              self.parser is not None and
-              self.parser.tree.openElements and
-              self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
-            matched = True
-            for expected in ["C", "D", "A", "T", "A", "["]:
-                charStack.append(self.stream.char())
-                if charStack[-1] != expected:
-                    matched = False
-                    break
-            if matched:
-                self.state = self.cdataSectionState
-                return True
-
-        self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                "expected-dashes-or-doctype"})
-
-        while charStack:
-            self.stream.unget(charStack.pop())
-        self.state = self.bogusCommentState
-        return True
-
-    def commentStartState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.state = self.commentStartDashState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"] += "\uFFFD"
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "incorrect-comment"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-comment"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["data"] += data
-            self.state = self.commentState
-        return True
-
-    def commentStartDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.state = self.commentEndState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"] += "-\uFFFD"
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "incorrect-comment"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-comment"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["data"] += "-" + data
-            self.state = self.commentState
-        return True
-
-    def commentState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.state = self.commentEndDashState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"] += "\uFFFD"
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "eof-in-comment"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["data"] += data + \
-                self.stream.charsUntil(("-", "\u0000"))
-        return True
-
-    def commentEndDashState(self):
-        data = self.stream.char()
-        if data == "-":
-            self.state = self.commentEndState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"] += "-\uFFFD"
-            self.state = self.commentState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-comment-end-dash"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["data"] += "-" + data
-            self.state = self.commentState
-        return True
-
-    def commentEndState(self):
-        data = self.stream.char()
-        if data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"] += "--\uFFFD"
-            self.state = self.commentState
-        elif data == "!":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-bang-after-double-dash-in-comment"})
-            self.state = self.commentEndBangState
-        elif data == "-":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-dash-after-double-dash-in-comment"})
-            self.currentToken["data"] += data
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-comment-double-dash"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            # XXX
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-comment"})
-            self.currentToken["data"] += "--" + data
-            self.state = self.commentState
-        return True
-
-    def commentEndBangState(self):
-        data = self.stream.char()
-        if data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data == "-":
-            self.currentToken["data"] += "--!"
-            self.state = self.commentEndDashState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["data"] += "--!\uFFFD"
-            self.state = self.commentState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-comment-end-bang-state"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["data"] += "--!" + data
-            self.state = self.commentState
-        return True
-
-    def doctypeState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.beforeDoctypeNameState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-doctype-name-but-got-eof"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "need-space-after-doctype"})
-            self.stream.unget(data)
-            self.state = self.beforeDoctypeNameState
-        return True
-
-    def beforeDoctypeNameState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            pass
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-doctype-name-but-got-right-bracket"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["name"] = "\uFFFD"
-            self.state = self.doctypeNameState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-doctype-name-but-got-eof"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["name"] = data
-            self.state = self.doctypeNameState
-        return True
-
-    def doctypeNameState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
-            self.state = self.afterDoctypeNameState
-        elif data == ">":
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["name"] += "\uFFFD"
-            self.state = self.doctypeNameState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype-name"})
-            self.currentToken["correct"] = False
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["name"] += data
-        return True
-
-    def afterDoctypeNameState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            pass
-        elif data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.currentToken["correct"] = False
-            self.stream.unget(data)
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            if data in ("p", "P"):
-                matched = True
-                for expected in (("u", "U"), ("b", "B"), ("l", "L"),
-                                 ("i", "I"), ("c", "C")):
-                    data = self.stream.char()
-                    if data not in expected:
-                        matched = False
-                        break
-                if matched:
-                    self.state = self.afterDoctypePublicKeywordState
-                    return True
-            elif data in ("s", "S"):
-                matched = True
-                for expected in (("y", "Y"), ("s", "S"), ("t", "T"),
-                                 ("e", "E"), ("m", "M")):
-                    data = self.stream.char()
-                    if data not in expected:
-                        matched = False
-                        break
-                if matched:
-                    self.state = self.afterDoctypeSystemKeywordState
-                    return True
-
-            # All the characters read before the current 'data' will be
-            # [a-zA-Z], so they're garbage in the bogus doctype and can be
-            # discarded; only the latest character might be '>' or EOF
-            # and needs to be ungetted
-            self.stream.unget(data)
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "expected-space-or-right-bracket-in-doctype", "datavars":
-                                    {"data": data}})
-            self.currentToken["correct"] = False
-            self.state = self.bogusDoctypeState
-
-        return True
-
-    def afterDoctypePublicKeywordState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.beforeDoctypePublicIdentifierState
-        elif data in ("'", '"'):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.stream.unget(data)
-            self.state = self.beforeDoctypePublicIdentifierState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.stream.unget(data)
-            self.state = self.beforeDoctypePublicIdentifierState
-        return True
-
-    def beforeDoctypePublicIdentifierState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            pass
-        elif data == "\"":
-            self.currentToken["publicId"] = ""
-            self.state = self.doctypePublicIdentifierDoubleQuotedState
-        elif data == "'":
-            self.currentToken["publicId"] = ""
-            self.state = self.doctypePublicIdentifierSingleQuotedState
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-end-of-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["correct"] = False
-            self.state = self.bogusDoctypeState
-        return True
-
-    def doctypePublicIdentifierDoubleQuotedState(self):
-        data = self.stream.char()
-        if data == "\"":
-            self.state = self.afterDoctypePublicIdentifierState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["publicId"] += "\uFFFD"
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-end-of-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["publicId"] += data
-        return True
-
-    def doctypePublicIdentifierSingleQuotedState(self):
-        data = self.stream.char()
-        if data == "'":
-            self.state = self.afterDoctypePublicIdentifierState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["publicId"] += "\uFFFD"
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-end-of-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["publicId"] += data
-        return True
-
-    def afterDoctypePublicIdentifierState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.betweenDoctypePublicAndSystemIdentifiersState
-        elif data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data == '"':
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["systemId"] = ""
-            self.state = self.doctypeSystemIdentifierDoubleQuotedState
-        elif data == "'":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["systemId"] = ""
-            self.state = self.doctypeSystemIdentifierSingleQuotedState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["correct"] = False
-            self.state = self.bogusDoctypeState
-        return True
-
-    def betweenDoctypePublicAndSystemIdentifiersState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            pass
-        elif data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data == '"':
-            self.currentToken["systemId"] = ""
-            self.state = self.doctypeSystemIdentifierDoubleQuotedState
-        elif data == "'":
-            self.currentToken["systemId"] = ""
-            self.state = self.doctypeSystemIdentifierSingleQuotedState
-        elif data == EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["correct"] = False
-            self.state = self.bogusDoctypeState
-        return True
-
-    def afterDoctypeSystemKeywordState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            self.state = self.beforeDoctypeSystemIdentifierState
-        elif data in ("'", '"'):
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.stream.unget(data)
-            self.state = self.beforeDoctypeSystemIdentifierState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.stream.unget(data)
-            self.state = self.beforeDoctypeSystemIdentifierState
-        return True
-
-    def beforeDoctypeSystemIdentifierState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            pass
-        elif data == "\"":
-            self.currentToken["systemId"] = ""
-            self.state = self.doctypeSystemIdentifierDoubleQuotedState
-        elif data == "'":
-            self.currentToken["systemId"] = ""
-            self.state = self.doctypeSystemIdentifierSingleQuotedState
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.currentToken["correct"] = False
-            self.state = self.bogusDoctypeState
-        return True
-
-    def doctypeSystemIdentifierDoubleQuotedState(self):
-        data = self.stream.char()
-        if data == "\"":
-            self.state = self.afterDoctypeSystemIdentifierState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["systemId"] += "\uFFFD"
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-end-of-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["systemId"] += data
-        return True
-
-    def doctypeSystemIdentifierSingleQuotedState(self):
-        data = self.stream.char()
-        if data == "'":
-            self.state = self.afterDoctypeSystemIdentifierState
-        elif data == "\u0000":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data": "invalid-codepoint"})
-            self.currentToken["systemId"] += "\uFFFD"
-        elif data == ">":
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-end-of-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.currentToken["systemId"] += data
-        return True
-
-    def afterDoctypeSystemIdentifierState(self):
-        data = self.stream.char()
-        if data in spaceCharacters:
-            pass
-        elif data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "eof-in-doctype"})
-            self.currentToken["correct"] = False
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                                    "unexpected-char-in-doctype"})
-            self.state = self.bogusDoctypeState
-        return True
-
-    def bogusDoctypeState(self):
-        data = self.stream.char()
-        if data == ">":
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        elif data is EOF:
-            # XXX EMIT
-            self.stream.unget(data)
-            self.tokenQueue.append(self.currentToken)
-            self.state = self.dataState
-        else:
-            pass
-        return True
-
-    def cdataSectionState(self):
-        data = []
-        while True:
-            data.append(self.stream.charsUntil("]"))
-            data.append(self.stream.charsUntil(">"))
-            char = self.stream.char()
-            if char == EOF:
-                break
-            else:
-                assert char == ">"
-                if data[-1][-2:] == "]]":
-                    data[-1] = data[-1][:-2]
-                    break
-                else:
-                    data.append(char)
-
-        data = "".join(data)  # pylint:disable=redefined-variable-type
-        # Deal with null here rather than in the parser
-        nullCount = data.count("\u0000")
-        if nullCount > 0:
-            for _ in range(nullCount):
-                self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                        "data": "invalid-codepoint"})
-            data = data.replace("\u0000", "\uFFFD")
-        if data:
-            self.tokenQueue.append({"type": tokenTypes["Characters"],
-                                    "data": data})
-        self.state = self.dataState
-        return True
diff --git a/src/pip/_vendor/html5lib/_trie/__init__.py b/src/pip/_vendor/html5lib/_trie/__init__.py
deleted file mode 100644
index 07bad5d31..000000000
--- a/src/pip/_vendor/html5lib/_trie/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from .py import Trie
-
-__all__ = ["Trie"]
diff --git a/src/pip/_vendor/html5lib/_trie/_base.py b/src/pip/_vendor/html5lib/_trie/_base.py
deleted file mode 100644
index 6b71975f0..000000000
--- a/src/pip/_vendor/html5lib/_trie/_base.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-try:
-    from collections.abc import Mapping
-except ImportError:  # Python 2.7
-    from collections import Mapping
-
-
-class Trie(Mapping):
-    """Abstract base class for tries"""
-
-    def keys(self, prefix=None):
-        # pylint:disable=arguments-differ
-        keys = super(Trie, self).keys()
-
-        if prefix is None:
-            return set(keys)
-
-        return {x for x in keys if x.startswith(prefix)}
-
-    def has_keys_with_prefix(self, prefix):
-        for key in self.keys():
-            if key.startswith(prefix):
-                return True
-
-        return False
-
-    def longest_prefix(self, prefix):
-        if prefix in self:
-            return prefix
-
-        for i in range(1, len(prefix) + 1):
-            if prefix[:-i] in self:
-                return prefix[:-i]
-
-        raise KeyError(prefix)
-
-    def longest_prefix_item(self, prefix):
-        lprefix = self.longest_prefix(prefix)
-        return (lprefix, self[lprefix])
diff --git a/src/pip/_vendor/html5lib/_trie/py.py b/src/pip/_vendor/html5lib/_trie/py.py
deleted file mode 100644
index c178b219d..000000000
--- a/src/pip/_vendor/html5lib/_trie/py.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from bisect import bisect_left
-
-from ._base import Trie as ABCTrie
-
-
-class Trie(ABCTrie):
-    def __init__(self, data):
-        if not all(isinstance(x, text_type) for x in data.keys()):
-            raise TypeError("All keys must be strings")
-
-        self._data = data
-        self._keys = sorted(data.keys())
-        self._cachestr = ""
-        self._cachepoints = (0, len(data))
-
-    def __contains__(self, key):
-        return key in self._data
-
-    def __len__(self):
-        return len(self._data)
-
-    def __iter__(self):
-        return iter(self._data)
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def keys(self, prefix=None):
-        if prefix is None or prefix == "" or not self._keys:
-            return set(self._keys)
-
-        if prefix.startswith(self._cachestr):
-            lo, hi = self._cachepoints
-            start = i = bisect_left(self._keys, prefix, lo, hi)
-        else:
-            start = i = bisect_left(self._keys, prefix)
-
-        keys = set()
-        if start == len(self._keys):
-            return keys
-
-        while self._keys[i].startswith(prefix):
-            keys.add(self._keys[i])
-            i += 1
-
-        self._cachestr = prefix
-        self._cachepoints = (start, i)
-
-        return keys
-
-    def has_keys_with_prefix(self, prefix):
-        if prefix in self._data:
-            return True
-
-        if prefix.startswith(self._cachestr):
-            lo, hi = self._cachepoints
-            i = bisect_left(self._keys, prefix, lo, hi)
-        else:
-            i = bisect_left(self._keys, prefix)
-
-        if i == len(self._keys):
-            return False
-
-        return self._keys[i].startswith(prefix)
diff --git a/src/pip/_vendor/html5lib/_utils.py b/src/pip/_vendor/html5lib/_utils.py
deleted file mode 100644
index d7c4926af..000000000
--- a/src/pip/_vendor/html5lib/_utils.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from types import ModuleType
-
-try:
-    from collections.abc import Mapping
-except ImportError:
-    from collections import Mapping
-
-from pip._vendor.six import text_type, PY3
-
-if PY3:
-    import xml.etree.ElementTree as default_etree
-else:
-    try:
-        import xml.etree.cElementTree as default_etree
-    except ImportError:
-        import xml.etree.ElementTree as default_etree
-
-
-__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
-           "surrogatePairToCodepoint", "moduleFactoryFactory",
-           "supports_lone_surrogates"]
-
-
-# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
-# caught by the below test. In general this would be any platform
-# using UTF-16 as its encoding of unicode strings, such as
-# Jython. This is because UTF-16 itself is based on the use of such
-# surrogates, and there is no mechanism to further escape such
-# escapes.
-try:
-    _x = eval('"\\uD800"')  # pylint:disable=eval-used
-    if not isinstance(_x, text_type):
-        # We need this with u"" because of http://bugs.jython.org/issue2039
-        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
-        assert isinstance(_x, text_type)
-except Exception:
-    supports_lone_surrogates = False
-else:
-    supports_lone_surrogates = True
-
-
-class MethodDispatcher(dict):
-    """Dict with 2 special properties:
-
-    On initiation, keys that are lists, sets or tuples are converted to
-    multiple keys so accessing any one of the items in the original
-    list-like object returns the matching value
-
-    md = MethodDispatcher({("foo", "bar"):"baz"})
-    md["foo"] == "baz"
-
-    A default value which can be set through the default attribute.
-    """
-
-    def __init__(self, items=()):
-        _dictEntries = []
-        for name, value in items:
-            if isinstance(name, (list, tuple, frozenset, set)):
-                for item in name:
-                    _dictEntries.append((item, value))
-            else:
-                _dictEntries.append((name, value))
-        dict.__init__(self, _dictEntries)
-        assert len(self) == len(_dictEntries)
-        self.default = None
-
-    def __getitem__(self, key):
-        return dict.get(self, key, self.default)
-
-    def __get__(self, instance, owner=None):
-        return BoundMethodDispatcher(instance, self)
-
-
-class BoundMethodDispatcher(Mapping):
-    """Wraps a MethodDispatcher, binding its return values to `instance`"""
-    def __init__(self, instance, dispatcher):
-        self.instance = instance
-        self.dispatcher = dispatcher
-
-    def __getitem__(self, key):
-        # see https://docs.python.org/3/reference/datamodel.html#object.__get__
-        # on a function, __get__ is used to bind a function to an instance as a bound method
-        return self.dispatcher[key].__get__(self.instance)
-
-    def get(self, key, default):
-        if key in self.dispatcher:
-            return self[key]
-        else:
-            return default
-
-    def __iter__(self):
-        return iter(self.dispatcher)
-
-    def __len__(self):
-        return len(self.dispatcher)
-
-    def __contains__(self, key):
-        return key in self.dispatcher
-
-
-# Some utility functions to deal with weirdness around UCS2 vs UCS4
-# python builds
-
-def isSurrogatePair(data):
-    return (len(data) == 2 and
-            ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
-            ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
-
-
-def surrogatePairToCodepoint(data):
-    char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
-                (ord(data[1]) - 0xDC00))
-    return char_val
-
-# Module Factory Factory (no, this isn't Java, I know)
-# Here to stop this being duplicated all over the place.
-
-
-def moduleFactoryFactory(factory):
-    moduleCache = {}
-
-    def moduleFactory(baseModule, *args, **kwargs):
-        if isinstance(ModuleType.__name__, type("")):
-            name = "_%s_factory" % baseModule.__name__
-        else:
-            name = b"_%s_factory" % baseModule.__name__
-
-        kwargs_tuple = tuple(kwargs.items())
-
-        try:
-            return moduleCache[name][args][kwargs_tuple]
-        except KeyError:
-            mod = ModuleType(name)
-            objs = factory(baseModule, *args, **kwargs)
-            mod.__dict__.update(objs)
-            if "name" not in moduleCache:
-                moduleCache[name] = {}
-            if "args" not in moduleCache[name]:
-                moduleCache[name][args] = {}
-            if "kwargs" not in moduleCache[name][args]:
-                moduleCache[name][args][kwargs_tuple] = {}
-            moduleCache[name][args][kwargs_tuple] = mod
-            return mod
-
-    return moduleFactory
-
-
-def memoize(func):
-    cache = {}
-
-    def wrapped(*args, **kwargs):
-        key = (tuple(args), tuple(kwargs.items()))
-        if key not in cache:
-            cache[key] = func(*args, **kwargs)
-        return cache[key]
-
-    return wrapped
diff --git a/src/pip/_vendor/html5lib/constants.py b/src/pip/_vendor/html5lib/constants.py
deleted file mode 100644
index fe3e237cd..000000000
--- a/src/pip/_vendor/html5lib/constants.py
+++ /dev/null
@@ -1,2946 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import string
-
-EOF = None
-
-E = {
-    "null-character":
-        "Null character in input stream, replaced with U+FFFD.",
-    "invalid-codepoint":
-        "Invalid codepoint in stream.",
-    "incorrectly-placed-solidus":
-        "Solidus (/) incorrectly placed in tag.",
-    "incorrect-cr-newline-entity":
-        "Incorrect CR newline entity, replaced with LF.",
-    "illegal-windows-1252-entity":
-        "Entity used with illegal number (windows-1252 reference).",
-    "cant-convert-numeric-entity":
-        "Numeric entity couldn't be converted to character "
-        "(codepoint U+%(charAsInt)08x).",
-    "illegal-codepoint-for-numeric-entity":
-        "Numeric entity represents an illegal codepoint: "
-        "U+%(charAsInt)08x.",
-    "numeric-entity-without-semicolon":
-        "Numeric entity didn't end with ';'.",
-    "expected-numeric-entity-but-got-eof":
-        "Numeric entity expected. Got end of file instead.",
-    "expected-numeric-entity":
-        "Numeric entity expected but none found.",
-    "named-entity-without-semicolon":
-        "Named entity didn't end with ';'.",
-    "expected-named-entity":
-        "Named entity expected. Got none.",
-    "attributes-in-end-tag":
-        "End tag contains unexpected attributes.",
-    'self-closing-flag-on-end-tag':
-        "End tag contains unexpected self-closing flag.",
-    "expected-tag-name-but-got-right-bracket":
-        "Expected tag name. Got '>' instead.",
-    "expected-tag-name-but-got-question-mark":
-        "Expected tag name. Got '?' instead. (HTML doesn't "
-        "support processing instructions.)",
-    "expected-tag-name":
-        "Expected tag name. Got something else instead",
-    "expected-closing-tag-but-got-right-bracket":
-        "Expected closing tag. Got '>' instead. Ignoring '</>'.",
-    "expected-closing-tag-but-got-eof":
-        "Expected closing tag. Unexpected end of file.",
-    "expected-closing-tag-but-got-char":
-        "Expected closing tag. Unexpected character '%(data)s' found.",
-    "eof-in-tag-name":
-        "Unexpected end of file in the tag name.",
-    "expected-attribute-name-but-got-eof":
-        "Unexpected end of file. Expected attribute name instead.",
-    "eof-in-attribute-name":
-        "Unexpected end of file in attribute name.",
-    "invalid-character-in-attribute-name":
-        "Invalid character in attribute name",
-    "duplicate-attribute":
-        "Dropped duplicate attribute on tag.",
-    "expected-end-of-tag-name-but-got-eof":
-        "Unexpected end of file. Expected = or end of tag.",
-    "expected-attribute-value-but-got-eof":
-        "Unexpected end of file. Expected attribute value.",
-    "expected-attribute-value-but-got-right-bracket":
-        "Expected attribute value. Got '>' instead.",
-    'equals-in-unquoted-attribute-value':
-        "Unexpected = in unquoted attribute",
-    'unexpected-character-in-unquoted-attribute-value':
-        "Unexpected character in unquoted attribute",
-    "invalid-character-after-attribute-name":
-        "Unexpected character after attribute name.",
-    "unexpected-character-after-attribute-value":
-        "Unexpected character after attribute value.",
-    "eof-in-attribute-value-double-quote":
-        "Unexpected end of file in attribute value (\").",
-    "eof-in-attribute-value-single-quote":
-        "Unexpected end of file in attribute value (').",
-    "eof-in-attribute-value-no-quotes":
-        "Unexpected end of file in attribute value.",
-    "unexpected-EOF-after-solidus-in-tag":
-        "Unexpected end of file in tag. Expected >",
-    "unexpected-character-after-solidus-in-tag":
-        "Unexpected character after / in tag. Expected >",
-    "expected-dashes-or-doctype":
-        "Expected '--' or 'DOCTYPE'. Not found.",
-    "unexpected-bang-after-double-dash-in-comment":
-        "Unexpected ! after -- in comment",
-    "unexpected-space-after-double-dash-in-comment":
-        "Unexpected space after -- in comment",
-    "incorrect-comment":
-        "Incorrect comment.",
-    "eof-in-comment":
-        "Unexpected end of file in comment.",
-    "eof-in-comment-end-dash":
-        "Unexpected end of file in comment (-)",
-    "unexpected-dash-after-double-dash-in-comment":
-        "Unexpected '-' after '--' found in comment.",
-    "eof-in-comment-double-dash":
-        "Unexpected end of file in comment (--).",
-    "eof-in-comment-end-space-state":
-        "Unexpected end of file in comment.",
-    "eof-in-comment-end-bang-state":
-        "Unexpected end of file in comment.",
-    "unexpected-char-in-comment":
-        "Unexpected character in comment found.",
-    "need-space-after-doctype":
-        "No space after literal string 'DOCTYPE'.",
-    "expected-doctype-name-but-got-right-bracket":
-        "Unexpected > character. Expected DOCTYPE name.",
-    "expected-doctype-name-but-got-eof":
-        "Unexpected end of file. Expected DOCTYPE name.",
-    "eof-in-doctype-name":
-        "Unexpected end of file in DOCTYPE name.",
-    "eof-in-doctype":
-        "Unexpected end of file in DOCTYPE.",
-    "expected-space-or-right-bracket-in-doctype":
-        "Expected space or '>'. Got '%(data)s'",
-    "unexpected-end-of-doctype":
-        "Unexpected end of DOCTYPE.",
-    "unexpected-char-in-doctype":
-        "Unexpected character in DOCTYPE.",
-    "eof-in-innerhtml":
-        "XXX innerHTML EOF",
-    "unexpected-doctype":
-        "Unexpected DOCTYPE. Ignored.",
-    "non-html-root":
-        "html needs to be the first start tag.",
-    "expected-doctype-but-got-eof":
-        "Unexpected End of file. Expected DOCTYPE.",
-    "unknown-doctype":
-        "Erroneous DOCTYPE.",
-    "expected-doctype-but-got-chars":
-        "Unexpected non-space characters. Expected DOCTYPE.",
-    "expected-doctype-but-got-start-tag":
-        "Unexpected start tag (%(name)s). Expected DOCTYPE.",
-    "expected-doctype-but-got-end-tag":
-        "Unexpected end tag (%(name)s). Expected DOCTYPE.",
-    "end-tag-after-implied-root":
-        "Unexpected end tag (%(name)s) after the (implied) root element.",
-    "expected-named-closing-tag-but-got-eof":
-        "Unexpected end of file. Expected end tag (%(name)s).",
-    "two-heads-are-not-better-than-one":
-        "Unexpected start tag head in existing head. Ignored.",
-    "unexpected-end-tag":
-        "Unexpected end tag (%(name)s). Ignored.",
-    "unexpected-start-tag-out-of-my-head":
-        "Unexpected start tag (%(name)s) that can be in head. Moved.",
-    "unexpected-start-tag":
-        "Unexpected start tag (%(name)s).",
-    "missing-end-tag":
-        "Missing end tag (%(name)s).",
-    "missing-end-tags":
-        "Missing end tags (%(name)s).",
-    "unexpected-start-tag-implies-end-tag":
-        "Unexpected start tag (%(startName)s) "
-        "implies end tag (%(endName)s).",
-    "unexpected-start-tag-treated-as":
-        "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
-    "deprecated-tag":
-        "Unexpected start tag %(name)s. Don't use it!",
-    "unexpected-start-tag-ignored":
-        "Unexpected start tag %(name)s. Ignored.",
-    "expected-one-end-tag-but-got-another":
-        "Unexpected end tag (%(gotName)s). "
-        "Missing end tag (%(expectedName)s).",
-    "end-tag-too-early":
-        "End tag (%(name)s) seen too early. Expected other end tag.",
-    "end-tag-too-early-named":
-        "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
-    "end-tag-too-early-ignored":
-        "End tag (%(name)s) seen too early. Ignored.",
-    "adoption-agency-1.1":
-        "End tag (%(name)s) violates step 1, "
-        "paragraph 1 of the adoption agency algorithm.",
-    "adoption-agency-1.2":
-        "End tag (%(name)s) violates step 1, "
-        "paragraph 2 of the adoption agency algorithm.",
-    "adoption-agency-1.3":
-        "End tag (%(name)s) violates step 1, "
-        "paragraph 3 of the adoption agency algorithm.",
-    "adoption-agency-4.4":
-        "End tag (%(name)s) violates step 4, "
-        "paragraph 4 of the adoption agency algorithm.",
-    "unexpected-end-tag-treated-as":
-        "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
-    "no-end-tag":
-        "This element (%(name)s) has no end tag.",
-    "unexpected-implied-end-tag-in-table":
-        "Unexpected implied end tag (%(name)s) in the table phase.",
-    "unexpected-implied-end-tag-in-table-body":
-        "Unexpected implied end tag (%(name)s) in the table body phase.",
-    "unexpected-char-implies-table-voodoo":
-        "Unexpected non-space characters in "
-        "table context caused voodoo mode.",
-    "unexpected-hidden-input-in-table":
-        "Unexpected input with type hidden in table context.",
-    "unexpected-form-in-table":
-        "Unexpected form in table context.",
-    "unexpected-start-tag-implies-table-voodoo":
-        "Unexpected start tag (%(name)s) in "
-        "table context caused voodoo mode.",
-    "unexpected-end-tag-implies-table-voodoo":
-        "Unexpected end tag (%(name)s) in "
-        "table context caused voodoo mode.",
-    "unexpected-cell-in-table-body":
-        "Unexpected table cell start tag (%(name)s) "
-        "in the table body phase.",
-    "unexpected-cell-end-tag":
-        "Got table cell end tag (%(name)s) "
-        "while required end tags are missing.",
-    "unexpected-end-tag-in-table-body":
-        "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
-    "unexpected-implied-end-tag-in-table-row":
-        "Unexpected implied end tag (%(name)s) in the table row phase.",
-    "unexpected-end-tag-in-table-row":
-        "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
-    "unexpected-select-in-select":
-        "Unexpected select start tag in the select phase "
-        "treated as select end tag.",
-    "unexpected-input-in-select":
-        "Unexpected input start tag in the select phase.",
-    "unexpected-start-tag-in-select":
-        "Unexpected start tag token (%(name)s in the select phase. "
-        "Ignored.",
-    "unexpected-end-tag-in-select":
-        "Unexpected end tag (%(name)s) in the select phase. Ignored.",
-    "unexpected-table-element-start-tag-in-select-in-table":
-        "Unexpected table element start tag (%(name)s) in the select in table phase.",
-    "unexpected-table-element-end-tag-in-select-in-table":
-        "Unexpected table element end tag (%(name)s) in the select in table phase.",
-    "unexpected-char-after-body":
-        "Unexpected non-space characters in the after body phase.",
-    "unexpected-start-tag-after-body":
-        "Unexpected start tag token (%(name)s)"
-        " in the after body phase.",
-    "unexpected-end-tag-after-body":
-        "Unexpected end tag token (%(name)s)"
-        " in the after body phase.",
-    "unexpected-char-in-frameset":
-        "Unexpected characters in the frameset phase. Characters ignored.",
-    "unexpected-start-tag-in-frameset":
-        "Unexpected start tag token (%(name)s)"
-        " in the frameset phase. Ignored.",
-    "unexpected-frameset-in-frameset-innerhtml":
-        "Unexpected end tag token (frameset) "
-        "in the frameset phase (innerHTML).",
-    "unexpected-end-tag-in-frameset":
-        "Unexpected end tag token (%(name)s)"
-        " in the frameset phase. Ignored.",
-    "unexpected-char-after-frameset":
-        "Unexpected non-space characters in the "
-        "after frameset phase. Ignored.",
-    "unexpected-start-tag-after-frameset":
-        "Unexpected start tag (%(name)s)"
-        " in the after frameset phase. Ignored.",
-    "unexpected-end-tag-after-frameset":
-        "Unexpected end tag (%(name)s)"
-        " in the after frameset phase. Ignored.",
-    "unexpected-end-tag-after-body-innerhtml":
-        "Unexpected end tag after body(innerHtml)",
-    "expected-eof-but-got-char":
-        "Unexpected non-space characters. Expected end of file.",
-    "expected-eof-but-got-start-tag":
-        "Unexpected start tag (%(name)s)"
-        ". Expected end of file.",
-    "expected-eof-but-got-end-tag":
-        "Unexpected end tag (%(name)s)"
-        ". Expected end of file.",
-    "eof-in-table":
-        "Unexpected end of file. Expected table content.",
-    "eof-in-select":
-        "Unexpected end of file. Expected select content.",
-    "eof-in-frameset":
-        "Unexpected end of file. Expected frameset content.",
-    "eof-in-script-in-script":
-        "Unexpected end of file. Expected script content.",
-    "eof-in-foreign-lands":
-        "Unexpected end of file. Expected foreign content",
-    "non-void-element-with-trailing-solidus":
-        "Trailing solidus not allowed on element %(name)s",
-    "unexpected-html-element-in-foreign-content":
-        "Element %(name)s not allowed in a non-html context",
-    "unexpected-end-tag-before-html":
-        "Unexpected end tag (%(name)s) before html.",
-    "unexpected-inhead-noscript-tag":
-        "Element %(name)s not allowed in a inhead-noscript context",
-    "eof-in-head-noscript":
-        "Unexpected end of file. Expected inhead-noscript content",
-    "char-in-head-noscript":
-        "Unexpected non-space character. Expected inhead-noscript content",
-    "XXX-undefined-error":
-        "Undefined error (this sucks and should be fixed)",
-}
-
-namespaces = {
-    "html": "http://www.w3.org/1999/xhtml",
-    "mathml": "http://www.w3.org/1998/Math/MathML",
-    "svg": "http://www.w3.org/2000/svg",
-    "xlink": "http://www.w3.org/1999/xlink",
-    "xml": "http://www.w3.org/XML/1998/namespace",
-    "xmlns": "http://www.w3.org/2000/xmlns/"
-}
-
-scopingElements = frozenset([
-    (namespaces["html"], "applet"),
-    (namespaces["html"], "caption"),
-    (namespaces["html"], "html"),
-    (namespaces["html"], "marquee"),
-    (namespaces["html"], "object"),
-    (namespaces["html"], "table"),
-    (namespaces["html"], "td"),
-    (namespaces["html"], "th"),
-    (namespaces["mathml"], "mi"),
-    (namespaces["mathml"], "mo"),
-    (namespaces["mathml"], "mn"),
-    (namespaces["mathml"], "ms"),
-    (namespaces["mathml"], "mtext"),
-    (namespaces["mathml"], "annotation-xml"),
-    (namespaces["svg"], "foreignObject"),
-    (namespaces["svg"], "desc"),
-    (namespaces["svg"], "title"),
-])
-
-formattingElements = frozenset([
-    (namespaces["html"], "a"),
-    (namespaces["html"], "b"),
-    (namespaces["html"], "big"),
-    (namespaces["html"], "code"),
-    (namespaces["html"], "em"),
-    (namespaces["html"], "font"),
-    (namespaces["html"], "i"),
-    (namespaces["html"], "nobr"),
-    (namespaces["html"], "s"),
-    (namespaces["html"], "small"),
-    (namespaces["html"], "strike"),
-    (namespaces["html"], "strong"),
-    (namespaces["html"], "tt"),
-    (namespaces["html"], "u")
-])
-
-specialElements = frozenset([
-    (namespaces["html"], "address"),
-    (namespaces["html"], "applet"),
-    (namespaces["html"], "area"),
-    (namespaces["html"], "article"),
-    (namespaces["html"], "aside"),
-    (namespaces["html"], "base"),
-    (namespaces["html"], "basefont"),
-    (namespaces["html"], "bgsound"),
-    (namespaces["html"], "blockquote"),
-    (namespaces["html"], "body"),
-    (namespaces["html"], "br"),
-    (namespaces["html"], "button"),
-    (namespaces["html"], "caption"),
-    (namespaces["html"], "center"),
-    (namespaces["html"], "col"),
-    (namespaces["html"], "colgroup"),
-    (namespaces["html"], "command"),
-    (namespaces["html"], "dd"),
-    (namespaces["html"], "details"),
-    (namespaces["html"], "dir"),
-    (namespaces["html"], "div"),
-    (namespaces["html"], "dl"),
-    (namespaces["html"], "dt"),
-    (namespaces["html"], "embed"),
-    (namespaces["html"], "fieldset"),
-    (namespaces["html"], "figure"),
-    (namespaces["html"], "footer"),
-    (namespaces["html"], "form"),
-    (namespaces["html"], "frame"),
-    (namespaces["html"], "frameset"),
-    (namespaces["html"], "h1"),
-    (namespaces["html"], "h2"),
-    (namespaces["html"], "h3"),
-    (namespaces["html"], "h4"),
-    (namespaces["html"], "h5"),
-    (namespaces["html"], "h6"),
-    (namespaces["html"], "head"),
-    (namespaces["html"], "header"),
-    (namespaces["html"], "hr"),
-    (namespaces["html"], "html"),
-    (namespaces["html"], "iframe"),
-    # Note that image is commented out in the spec as "this isn't an
-    # element that can end up on the stack, so it doesn't matter,"
-    (namespaces["html"], "image"),
-    (namespaces["html"], "img"),
-    (namespaces["html"], "input"),
-    (namespaces["html"], "isindex"),
-    (namespaces["html"], "li"),
-    (namespaces["html"], "link"),
-    (namespaces["html"], "listing"),
-    (namespaces["html"], "marquee"),
-    (namespaces["html"], "menu"),
-    (namespaces["html"], "meta"),
-    (namespaces["html"], "nav"),
-    (namespaces["html"], "noembed"),
-    (namespaces["html"], "noframes"),
-    (namespaces["html"], "noscript"),
-    (namespaces["html"], "object"),
-    (namespaces["html"], "ol"),
-    (namespaces["html"], "p"),
-    (namespaces["html"], "param"),
-    (namespaces["html"], "plaintext"),
-    (namespaces["html"], "pre"),
-    (namespaces["html"], "script"),
-    (namespaces["html"], "section"),
-    (namespaces["html"], "select"),
-    (namespaces["html"], "style"),
-    (namespaces["html"], "table"),
-    (namespaces["html"], "tbody"),
-    (namespaces["html"], "td"),
-    (namespaces["html"], "textarea"),
-    (namespaces["html"], "tfoot"),
-    (namespaces["html"], "th"),
-    (namespaces["html"], "thead"),
-    (namespaces["html"], "title"),
-    (namespaces["html"], "tr"),
-    (namespaces["html"], "ul"),
-    (namespaces["html"], "wbr"),
-    (namespaces["html"], "xmp"),
-    (namespaces["svg"], "foreignObject")
-])
-
-htmlIntegrationPointElements = frozenset([
-    (namespaces["mathml"], "annotation-xml"),
-    (namespaces["svg"], "foreignObject"),
-    (namespaces["svg"], "desc"),
-    (namespaces["svg"], "title")
-])
-
-mathmlTextIntegrationPointElements = frozenset([
-    (namespaces["mathml"], "mi"),
-    (namespaces["mathml"], "mo"),
-    (namespaces["mathml"], "mn"),
-    (namespaces["mathml"], "ms"),
-    (namespaces["mathml"], "mtext")
-])
-
-adjustSVGAttributes = {
-    "attributename": "attributeName",
-    "attributetype": "attributeType",
-    "basefrequency": "baseFrequency",
-    "baseprofile": "baseProfile",
-    "calcmode": "calcMode",
-    "clippathunits": "clipPathUnits",
-    "contentscripttype": "contentScriptType",
-    "contentstyletype": "contentStyleType",
-    "diffuseconstant": "diffuseConstant",
-    "edgemode": "edgeMode",
-    "externalresourcesrequired": "externalResourcesRequired",
-    "filterres": "filterRes",
-    "filterunits": "filterUnits",
-    "glyphref": "glyphRef",
-    "gradienttransform": "gradientTransform",
-    "gradientunits": "gradientUnits",
-    "kernelmatrix": "kernelMatrix",
-    "kernelunitlength": "kernelUnitLength",
-    "keypoints": "keyPoints",
-    "keysplines": "keySplines",
-    "keytimes": "keyTimes",
-    "lengthadjust": "lengthAdjust",
-    "limitingconeangle": "limitingConeAngle",
-    "markerheight": "markerHeight",
-    "markerunits": "markerUnits",
-    "markerwidth": "markerWidth",
-    "maskcontentunits": "maskContentUnits",
-    "maskunits": "maskUnits",
-    "numoctaves": "numOctaves",
-    "pathlength": "pathLength",
-    "patterncontentunits": "patternContentUnits",
-    "patterntransform": "patternTransform",
-    "patternunits": "patternUnits",
-    "pointsatx": "pointsAtX",
-    "pointsaty": "pointsAtY",
-    "pointsatz": "pointsAtZ",
-    "preservealpha": "preserveAlpha",
-    "preserveaspectratio": "preserveAspectRatio",
-    "primitiveunits": "primitiveUnits",
-    "refx": "refX",
-    "refy": "refY",
-    "repeatcount": "repeatCount",
-    "repeatdur": "repeatDur",
-    "requiredextensions": "requiredExtensions",
-    "requiredfeatures": "requiredFeatures",
-    "specularconstant": "specularConstant",
-    "specularexponent": "specularExponent",
-    "spreadmethod": "spreadMethod",
-    "startoffset": "startOffset",
-    "stddeviation": "stdDeviation",
-    "stitchtiles": "stitchTiles",
-    "surfacescale": "surfaceScale",
-    "systemlanguage": "systemLanguage",
-    "tablevalues": "tableValues",
-    "targetx": "targetX",
-    "targety": "targetY",
-    "textlength": "textLength",
-    "viewbox": "viewBox",
-    "viewtarget": "viewTarget",
-    "xchannelselector": "xChannelSelector",
-    "ychannelselector": "yChannelSelector",
-    "zoomandpan": "zoomAndPan"
-}
-
-adjustMathMLAttributes = {"definitionurl": "definitionURL"}
-
-adjustForeignAttributes = {
-    "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
-    "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
-    "xlink:href": ("xlink", "href", namespaces["xlink"]),
-    "xlink:role": ("xlink", "role", namespaces["xlink"]),
-    "xlink:show": ("xlink", "show", namespaces["xlink"]),
-    "xlink:title": ("xlink", "title", namespaces["xlink"]),
-    "xlink:type": ("xlink", "type", namespaces["xlink"]),
-    "xml:base": ("xml", "base", namespaces["xml"]),
-    "xml:lang": ("xml", "lang", namespaces["xml"]),
-    "xml:space": ("xml", "space", namespaces["xml"]),
-    "xmlns": (None, "xmlns", namespaces["xmlns"]),
-    "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
-}
-
-unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
-                             adjustForeignAttributes.items()}
-
-spaceCharacters = frozenset([
-    "\t",
-    "\n",
-    "\u000C",
-    " ",
-    "\r"
-])
-
-tableInsertModeElements = frozenset([
-    "table",
-    "tbody",
-    "tfoot",
-    "thead",
-    "tr"
-])
-
-asciiLowercase = frozenset(string.ascii_lowercase)
-asciiUppercase = frozenset(string.ascii_uppercase)
-asciiLetters = frozenset(string.ascii_letters)
-digits = frozenset(string.digits)
-hexDigits = frozenset(string.hexdigits)
-
-asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
-
-# Heading elements need to be ordered
-headingElements = (
-    "h1",
-    "h2",
-    "h3",
-    "h4",
-    "h5",
-    "h6"
-)
-
-voidElements = frozenset([
-    "base",
-    "command",
-    "event-source",
-    "link",
-    "meta",
-    "hr",
-    "br",
-    "img",
-    "embed",
-    "param",
-    "area",
-    "col",
-    "input",
-    "source",
-    "track"
-])
-
-cdataElements = frozenset(['title', 'textarea'])
-
-rcdataElements = frozenset([
-    'style',
-    'script',
-    'xmp',
-    'iframe',
-    'noembed',
-    'noframes',
-    'noscript'
-])
-
-booleanAttributes = {
-    "": frozenset(["irrelevant", "itemscope"]),
-    "style": frozenset(["scoped"]),
-    "img": frozenset(["ismap"]),
-    "audio": frozenset(["autoplay", "controls"]),
-    "video": frozenset(["autoplay", "controls"]),
-    "script": frozenset(["defer", "async"]),
-    "details": frozenset(["open"]),
-    "datagrid": frozenset(["multiple", "disabled"]),
-    "command": frozenset(["hidden", "disabled", "checked", "default"]),
-    "hr": frozenset(["noshade"]),
-    "menu": frozenset(["autosubmit"]),
-    "fieldset": frozenset(["disabled", "readonly"]),
-    "option": frozenset(["disabled", "readonly", "selected"]),
-    "optgroup": frozenset(["disabled", "readonly"]),
-    "button": frozenset(["disabled", "autofocus"]),
-    "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
-    "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
-    "output": frozenset(["disabled", "readonly"]),
-    "iframe": frozenset(["seamless"]),
-}
-
-# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
-# therefore can't be a frozenset.
-entitiesWindows1252 = (
-    8364,   # 0x80  0x20AC  EURO SIGN
-    65533,  # 0x81          UNDEFINED
-    8218,   # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
-    402,    # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
-    8222,   # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
-    8230,   # 0x85  0x2026  HORIZONTAL ELLIPSIS
-    8224,   # 0x86  0x2020  DAGGER
-    8225,   # 0x87  0x2021  DOUBLE DAGGER
-    710,    # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
-    8240,   # 0x89  0x2030  PER MILLE SIGN
-    352,    # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
-    8249,   # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    338,    # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
-    65533,  # 0x8D          UNDEFINED
-    381,    # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
-    65533,  # 0x8F          UNDEFINED
-    65533,  # 0x90          UNDEFINED
-    8216,   # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
-    8217,   # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
-    8220,   # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
-    8221,   # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
-    8226,   # 0x95  0x2022  BULLET
-    8211,   # 0x96  0x2013  EN DASH
-    8212,   # 0x97  0x2014  EM DASH
-    732,    # 0x98  0x02DC  SMALL TILDE
-    8482,   # 0x99  0x2122  TRADE MARK SIGN
-    353,    # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
-    8250,   # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    339,    # 0x9C  0x0153  LATIN SMALL LIGATURE OE
-    65533,  # 0x9D          UNDEFINED
-    382,    # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
-    376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
-)
-
-xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
-
-entities = {
-    "AElig": "\xc6",
-    "AElig;": "\xc6",
-    "AMP": "&",
-    "AMP;": "&",
-    "Aacute": "\xc1",
-    "Aacute;": "\xc1",
-    "Abreve;": "\u0102",
-    "Acirc": "\xc2",
-    "Acirc;": "\xc2",
-    "Acy;": "\u0410",
-    "Afr;": "\U0001d504",
-    "Agrave": "\xc0",
-    "Agrave;": "\xc0",
-    "Alpha;": "\u0391",
-    "Amacr;": "\u0100",
-    "And;": "\u2a53",
-    "Aogon;": "\u0104",
-    "Aopf;": "\U0001d538",
-    "ApplyFunction;": "\u2061",
-    "Aring": "\xc5",
-    "Aring;": "\xc5",
-    "Ascr;": "\U0001d49c",
-    "Assign;": "\u2254",
-    "Atilde": "\xc3",
-    "Atilde;": "\xc3",
-    "Auml": "\xc4",
-    "Auml;": "\xc4",
-    "Backslash;": "\u2216",
-    "Barv;": "\u2ae7",
-    "Barwed;": "\u2306",
-    "Bcy;": "\u0411",
-    "Because;": "\u2235",
-    "Bernoullis;": "\u212c",
-    "Beta;": "\u0392",
-    "Bfr;": "\U0001d505",
-    "Bopf;": "\U0001d539",
-    "Breve;": "\u02d8",
-    "Bscr;": "\u212c",
-    "Bumpeq;": "\u224e",
-    "CHcy;": "\u0427",
-    "COPY": "\xa9",
-    "COPY;": "\xa9",
-    "Cacute;": "\u0106",
-    "Cap;": "\u22d2",
-    "CapitalDifferentialD;": "\u2145",
-    "Cayleys;": "\u212d",
-    "Ccaron;": "\u010c",
-    "Ccedil": "\xc7",
-    "Ccedil;": "\xc7",
-    "Ccirc;": "\u0108",
-    "Cconint;": "\u2230",
-    "Cdot;": "\u010a",
-    "Cedilla;": "\xb8",
-    "CenterDot;": "\xb7",
-    "Cfr;": "\u212d",
-    "Chi;": "\u03a7",
-    "CircleDot;": "\u2299",
-    "CircleMinus;": "\u2296",
-    "CirclePlus;": "\u2295",
-    "CircleTimes;": "\u2297",
-    "ClockwiseContourIntegral;": "\u2232",
-    "CloseCurlyDoubleQuote;": "\u201d",
-    "CloseCurlyQuote;": "\u2019",
-    "Colon;": "\u2237",
-    "Colone;": "\u2a74",
-    "Congruent;": "\u2261",
-    "Conint;": "\u222f",
-    "ContourIntegral;": "\u222e",
-    "Copf;": "\u2102",
-    "Coproduct;": "\u2210",
-    "CounterClockwiseContourIntegral;": "\u2233",
-    "Cross;": "\u2a2f",
-    "Cscr;": "\U0001d49e",
-    "Cup;": "\u22d3",
-    "CupCap;": "\u224d",
-    "DD;": "\u2145",
-    "DDotrahd;": "\u2911",
-    "DJcy;": "\u0402",
-    "DScy;": "\u0405",
-    "DZcy;": "\u040f",
-    "Dagger;": "\u2021",
-    "Darr;": "\u21a1",
-    "Dashv;": "\u2ae4",
-    "Dcaron;": "\u010e",
-    "Dcy;": "\u0414",
-    "Del;": "\u2207",
-    "Delta;": "\u0394",
-    "Dfr;": "\U0001d507",
-    "DiacriticalAcute;": "\xb4",
-    "DiacriticalDot;": "\u02d9",
-    "DiacriticalDoubleAcute;": "\u02dd",
-    "DiacriticalGrave;": "`",
-    "DiacriticalTilde;": "\u02dc",
-    "Diamond;": "\u22c4",
-    "DifferentialD;": "\u2146",
-    "Dopf;": "\U0001d53b",
-    "Dot;": "\xa8",
-    "DotDot;": "\u20dc",
-    "DotEqual;": "\u2250",
-    "DoubleContourIntegral;": "\u222f",
-    "DoubleDot;": "\xa8",
-    "DoubleDownArrow;": "\u21d3",
-    "DoubleLeftArrow;": "\u21d0",
-    "DoubleLeftRightArrow;": "\u21d4",
-    "DoubleLeftTee;": "\u2ae4",
-    "DoubleLongLeftArrow;": "\u27f8",
-    "DoubleLongLeftRightArrow;": "\u27fa",
-    "DoubleLongRightArrow;": "\u27f9",
-    "DoubleRightArrow;": "\u21d2",
-    "DoubleRightTee;": "\u22a8",
-    "DoubleUpArrow;": "\u21d1",
-    "DoubleUpDownArrow;": "\u21d5",
-    "DoubleVerticalBar;": "\u2225",
-    "DownArrow;": "\u2193",
-    "DownArrowBar;": "\u2913",
-    "DownArrowUpArrow;": "\u21f5",
-    "DownBreve;": "\u0311",
-    "DownLeftRightVector;": "\u2950",
-    "DownLeftTeeVector;": "\u295e",
-    "DownLeftVector;": "\u21bd",
-    "DownLeftVectorBar;": "\u2956",
-    "DownRightTeeVector;": "\u295f",
-    "DownRightVector;": "\u21c1",
-    "DownRightVectorBar;": "\u2957",
-    "DownTee;": "\u22a4",
-    "DownTeeArrow;": "\u21a7",
-    "Downarrow;": "\u21d3",
-    "Dscr;": "\U0001d49f",
-    "Dstrok;": "\u0110",
-    "ENG;": "\u014a",
-    "ETH": "\xd0",
-    "ETH;": "\xd0",
-    "Eacute": "\xc9",
-    "Eacute;": "\xc9",
-    "Ecaron;": "\u011a",
-    "Ecirc": "\xca",
-    "Ecirc;": "\xca",
-    "Ecy;": "\u042d",
-    "Edot;": "\u0116",
-    "Efr;": "\U0001d508",
-    "Egrave": "\xc8",
-    "Egrave;": "\xc8",
-    "Element;": "\u2208",
-    "Emacr;": "\u0112",
-    "EmptySmallSquare;": "\u25fb",
-    "EmptyVerySmallSquare;": "\u25ab",
-    "Eogon;": "\u0118",
-    "Eopf;": "\U0001d53c",
-    "Epsilon;": "\u0395",
-    "Equal;": "\u2a75",
-    "EqualTilde;": "\u2242",
-    "Equilibrium;": "\u21cc",
-    "Escr;": "\u2130",
-    "Esim;": "\u2a73",
-    "Eta;": "\u0397",
-    "Euml": "\xcb",
-    "Euml;": "\xcb",
-    "Exists;": "\u2203",
-    "ExponentialE;": "\u2147",
-    "Fcy;": "\u0424",
-    "Ffr;": "\U0001d509",
-    "FilledSmallSquare;": "\u25fc",
-    "FilledVerySmallSquare;": "\u25aa",
-    "Fopf;": "\U0001d53d",
-    "ForAll;": "\u2200",
-    "Fouriertrf;": "\u2131",
-    "Fscr;": "\u2131",
-    "GJcy;": "\u0403",
-    "GT": ">",
-    "GT;": ">",
-    "Gamma;": "\u0393",
-    "Gammad;": "\u03dc",
-    "Gbreve;": "\u011e",
-    "Gcedil;": "\u0122",
-    "Gcirc;": "\u011c",
-    "Gcy;": "\u0413",
-    "Gdot;": "\u0120",
-    "Gfr;": "\U0001d50a",
-    "Gg;": "\u22d9",
-    "Gopf;": "\U0001d53e",
-    "GreaterEqual;": "\u2265",
-    "GreaterEqualLess;": "\u22db",
-    "GreaterFullEqual;": "\u2267",
-    "GreaterGreater;": "\u2aa2",
-    "GreaterLess;": "\u2277",
-    "GreaterSlantEqual;": "\u2a7e",
-    "GreaterTilde;": "\u2273",
-    "Gscr;": "\U0001d4a2",
-    "Gt;": "\u226b",
-    "HARDcy;": "\u042a",
-    "Hacek;": "\u02c7",
-    "Hat;": "^",
-    "Hcirc;": "\u0124",
-    "Hfr;": "\u210c",
-    "HilbertSpace;": "\u210b",
-    "Hopf;": "\u210d",
-    "HorizontalLine;": "\u2500",
-    "Hscr;": "\u210b",
-    "Hstrok;": "\u0126",
-    "HumpDownHump;": "\u224e",
-    "HumpEqual;": "\u224f",
-    "IEcy;": "\u0415",
-    "IJlig;": "\u0132",
-    "IOcy;": "\u0401",
-    "Iacute": "\xcd",
-    "Iacute;": "\xcd",
-    "Icirc": "\xce",
-    "Icirc;": "\xce",
-    "Icy;": "\u0418",
-    "Idot;": "\u0130",
-    "Ifr;": "\u2111",
-    "Igrave": "\xcc",
-    "Igrave;": "\xcc",
-    "Im;": "\u2111",
-    "Imacr;": "\u012a",
-    "ImaginaryI;": "\u2148",
-    "Implies;": "\u21d2",
-    "Int;": "\u222c",
-    "Integral;": "\u222b",
-    "Intersection;": "\u22c2",
-    "InvisibleComma;": "\u2063",
-    "InvisibleTimes;": "\u2062",
-    "Iogon;": "\u012e",
-    "Iopf;": "\U0001d540",
-    "Iota;": "\u0399",
-    "Iscr;": "\u2110",
-    "Itilde;": "\u0128",
-    "Iukcy;": "\u0406",
-    "Iuml": "\xcf",
-    "Iuml;": "\xcf",
-    "Jcirc;": "\u0134",
-    "Jcy;": "\u0419",
-    "Jfr;": "\U0001d50d",
-    "Jopf;": "\U0001d541",
-    "Jscr;": "\U0001d4a5",
-    "Jsercy;": "\u0408",
-    "Jukcy;": "\u0404",
-    "KHcy;": "\u0425",
-    "KJcy;": "\u040c",
-    "Kappa;": "\u039a",
-    "Kcedil;": "\u0136",
-    "Kcy;": "\u041a",
-    "Kfr;": "\U0001d50e",
-    "Kopf;": "\U0001d542",
-    "Kscr;": "\U0001d4a6",
-    "LJcy;": "\u0409",
-    "LT": "<",
-    "LT;": "<",
-    "Lacute;": "\u0139",
-    "Lambda;": "\u039b",
-    "Lang;": "\u27ea",
-    "Laplacetrf;": "\u2112",
-    "Larr;": "\u219e",
-    "Lcaron;": "\u013d",
-    "Lcedil;": "\u013b",
-    "Lcy;": "\u041b",
-    "LeftAngleBracket;": "\u27e8",
-    "LeftArrow;": "\u2190",
-    "LeftArrowBar;": "\u21e4",
-    "LeftArrowRightArrow;": "\u21c6",
-    "LeftCeiling;": "\u2308",
-    "LeftDoubleBracket;": "\u27e6",
-    "LeftDownTeeVector;": "\u2961",
-    "LeftDownVector;": "\u21c3",
-    "LeftDownVectorBar;": "\u2959",
-    "LeftFloor;": "\u230a",
-    "LeftRightArrow;": "\u2194",
-    "LeftRightVector;": "\u294e",
-    "LeftTee;": "\u22a3",
-    "LeftTeeArrow;": "\u21a4",
-    "LeftTeeVector;": "\u295a",
-    "LeftTriangle;": "\u22b2",
-    "LeftTriangleBar;": "\u29cf",
-    "LeftTriangleEqual;": "\u22b4",
-    "LeftUpDownVector;": "\u2951",
-    "LeftUpTeeVector;": "\u2960",
-    "LeftUpVector;": "\u21bf",
-    "LeftUpVectorBar;": "\u2958",
-    "LeftVector;": "\u21bc",
-    "LeftVectorBar;": "\u2952",
-    "Leftarrow;": "\u21d0",
-    "Leftrightarrow;": "\u21d4",
-    "LessEqualGreater;": "\u22da",
-    "LessFullEqual;": "\u2266",
-    "LessGreater;": "\u2276",
-    "LessLess;": "\u2aa1",
-    "LessSlantEqual;": "\u2a7d",
-    "LessTilde;": "\u2272",
-    "Lfr;": "\U0001d50f",
-    "Ll;": "\u22d8",
-    "Lleftarrow;": "\u21da",
-    "Lmidot;": "\u013f",
-    "LongLeftArrow;": "\u27f5",
-    "LongLeftRightArrow;": "\u27f7",
-    "LongRightArrow;": "\u27f6",
-    "Longleftarrow;": "\u27f8",
-    "Longleftrightarrow;": "\u27fa",
-    "Longrightarrow;": "\u27f9",
-    "Lopf;": "\U0001d543",
-    "LowerLeftArrow;": "\u2199",
-    "LowerRightArrow;": "\u2198",
-    "Lscr;": "\u2112",
-    "Lsh;": "\u21b0",
-    "Lstrok;": "\u0141",
-    "Lt;": "\u226a",
-    "Map;": "\u2905",
-    "Mcy;": "\u041c",
-    "MediumSpace;": "\u205f",
-    "Mellintrf;": "\u2133",
-    "Mfr;": "\U0001d510",
-    "MinusPlus;": "\u2213",
-    "Mopf;": "\U0001d544",
-    "Mscr;": "\u2133",
-    "Mu;": "\u039c",
-    "NJcy;": "\u040a",
-    "Nacute;": "\u0143",
-    "Ncaron;": "\u0147",
-    "Ncedil;": "\u0145",
-    "Ncy;": "\u041d",
-    "NegativeMediumSpace;": "\u200b",
-    "NegativeThickSpace;": "\u200b",
-    "NegativeThinSpace;": "\u200b",
-    "NegativeVeryThinSpace;": "\u200b",
-    "NestedGreaterGreater;": "\u226b",
-    "NestedLessLess;": "\u226a",
-    "NewLine;": "\n",
-    "Nfr;": "\U0001d511",
-    "NoBreak;": "\u2060",
-    "NonBreakingSpace;": "\xa0",
-    "Nopf;": "\u2115",
-    "Not;": "\u2aec",
-    "NotCongruent;": "\u2262",
-    "NotCupCap;": "\u226d",
-    "NotDoubleVerticalBar;": "\u2226",
-    "NotElement;": "\u2209",
-    "NotEqual;": "\u2260",
-    "NotEqualTilde;": "\u2242\u0338",
-    "NotExists;": "\u2204",
-    "NotGreater;": "\u226f",
-    "NotGreaterEqual;": "\u2271",
-    "NotGreaterFullEqual;": "\u2267\u0338",
-    "NotGreaterGreater;": "\u226b\u0338",
-    "NotGreaterLess;": "\u2279",
-    "NotGreaterSlantEqual;": "\u2a7e\u0338",
-    "NotGreaterTilde;": "\u2275",
-    "NotHumpDownHump;": "\u224e\u0338",
-    "NotHumpEqual;": "\u224f\u0338",
-    "NotLeftTriangle;": "\u22ea",
-    "NotLeftTriangleBar;": "\u29cf\u0338",
-    "NotLeftTriangleEqual;": "\u22ec",
-    "NotLess;": "\u226e",
-    "NotLessEqual;": "\u2270",
-    "NotLessGreater;": "\u2278",
-    "NotLessLess;": "\u226a\u0338",
-    "NotLessSlantEqual;": "\u2a7d\u0338",
-    "NotLessTilde;": "\u2274",
-    "NotNestedGreaterGreater;": "\u2aa2\u0338",
-    "NotNestedLessLess;": "\u2aa1\u0338",
-    "NotPrecedes;": "\u2280",
-    "NotPrecedesEqual;": "\u2aaf\u0338",
-    "NotPrecedesSlantEqual;": "\u22e0",
-    "NotReverseElement;": "\u220c",
-    "NotRightTriangle;": "\u22eb",
-    "NotRightTriangleBar;": "\u29d0\u0338",
-    "NotRightTriangleEqual;": "\u22ed",
-    "NotSquareSubset;": "\u228f\u0338",
-    "NotSquareSubsetEqual;": "\u22e2",
-    "NotSquareSuperset;": "\u2290\u0338",
-    "NotSquareSupersetEqual;": "\u22e3",
-    "NotSubset;": "\u2282\u20d2",
-    "NotSubsetEqual;": "\u2288",
-    "NotSucceeds;": "\u2281",
-    "NotSucceedsEqual;": "\u2ab0\u0338",
-    "NotSucceedsSlantEqual;": "\u22e1",
-    "NotSucceedsTilde;": "\u227f\u0338",
-    "NotSuperset;": "\u2283\u20d2",
-    "NotSupersetEqual;": "\u2289",
-    "NotTilde;": "\u2241",
-    "NotTildeEqual;": "\u2244",
-    "NotTildeFullEqual;": "\u2247",
-    "NotTildeTilde;": "\u2249",
-    "NotVerticalBar;": "\u2224",
-    "Nscr;": "\U0001d4a9",
-    "Ntilde": "\xd1",
-    "Ntilde;": "\xd1",
-    "Nu;": "\u039d",
-    "OElig;": "\u0152",
-    "Oacute": "\xd3",
-    "Oacute;": "\xd3",
-    "Ocirc": "\xd4",
-    "Ocirc;": "\xd4",
-    "Ocy;": "\u041e",
-    "Odblac;": "\u0150",
-    "Ofr;": "\U0001d512",
-    "Ograve": "\xd2",
-    "Ograve;": "\xd2",
-    "Omacr;": "\u014c",
-    "Omega;": "\u03a9",
-    "Omicron;": "\u039f",
-    "Oopf;": "\U0001d546",
-    "OpenCurlyDoubleQuote;": "\u201c",
-    "OpenCurlyQuote;": "\u2018",
-    "Or;": "\u2a54",
-    "Oscr;": "\U0001d4aa",
-    "Oslash": "\xd8",
-    "Oslash;": "\xd8",
-    "Otilde": "\xd5",
-    "Otilde;": "\xd5",
-    "Otimes;": "\u2a37",
-    "Ouml": "\xd6",
-    "Ouml;": "\xd6",
-    "OverBar;": "\u203e",
-    "OverBrace;": "\u23de",
-    "OverBracket;": "\u23b4",
-    "OverParenthesis;": "\u23dc",
-    "PartialD;": "\u2202",
-    "Pcy;": "\u041f",
-    "Pfr;": "\U0001d513",
-    "Phi;": "\u03a6",
-    "Pi;": "\u03a0",
-    "PlusMinus;": "\xb1",
-    "Poincareplane;": "\u210c",
-    "Popf;": "\u2119",
-    "Pr;": "\u2abb",
-    "Precedes;": "\u227a",
-    "PrecedesEqual;": "\u2aaf",
-    "PrecedesSlantEqual;": "\u227c",
-    "PrecedesTilde;": "\u227e",
-    "Prime;": "\u2033",
-    "Product;": "\u220f",
-    "Proportion;": "\u2237",
-    "Proportional;": "\u221d",
-    "Pscr;": "\U0001d4ab",
-    "Psi;": "\u03a8",
-    "QUOT": "\"",
-    "QUOT;": "\"",
-    "Qfr;": "\U0001d514",
-    "Qopf;": "\u211a",
-    "Qscr;": "\U0001d4ac",
-    "RBarr;": "\u2910",
-    "REG": "\xae",
-    "REG;": "\xae",
-    "Racute;": "\u0154",
-    "Rang;": "\u27eb",
-    "Rarr;": "\u21a0",
-    "Rarrtl;": "\u2916",
-    "Rcaron;": "\u0158",
-    "Rcedil;": "\u0156",
-    "Rcy;": "\u0420",
-    "Re;": "\u211c",
-    "ReverseElement;": "\u220b",
-    "ReverseEquilibrium;": "\u21cb",
-    "ReverseUpEquilibrium;": "\u296f",
-    "Rfr;": "\u211c",
-    "Rho;": "\u03a1",
-    "RightAngleBracket;": "\u27e9",
-    "RightArrow;": "\u2192",
-    "RightArrowBar;": "\u21e5",
-    "RightArrowLeftArrow;": "\u21c4",
-    "RightCeiling;": "\u2309",
-    "RightDoubleBracket;": "\u27e7",
-    "RightDownTeeVector;": "\u295d",
-    "RightDownVector;": "\u21c2",
-    "RightDownVectorBar;": "\u2955",
-    "RightFloor;": "\u230b",
-    "RightTee;": "\u22a2",
-    "RightTeeArrow;": "\u21a6",
-    "RightTeeVector;": "\u295b",
-    "RightTriangle;": "\u22b3",
-    "RightTriangleBar;": "\u29d0",
-    "RightTriangleEqual;": "\u22b5",
-    "RightUpDownVector;": "\u294f",
-    "RightUpTeeVector;": "\u295c",
-    "RightUpVector;": "\u21be",
-    "RightUpVectorBar;": "\u2954",
-    "RightVector;": "\u21c0",
-    "RightVectorBar;": "\u2953",
-    "Rightarrow;": "\u21d2",
-    "Ropf;": "\u211d",
-    "RoundImplies;": "\u2970",
-    "Rrightarrow;": "\u21db",
-    "Rscr;": "\u211b",
-    "Rsh;": "\u21b1",
-    "RuleDelayed;": "\u29f4",
-    "SHCHcy;": "\u0429",
-    "SHcy;": "\u0428",
-    "SOFTcy;": "\u042c",
-    "Sacute;": "\u015a",
-    "Sc;": "\u2abc",
-    "Scaron;": "\u0160",
-    "Scedil;": "\u015e",
-    "Scirc;": "\u015c",
-    "Scy;": "\u0421",
-    "Sfr;": "\U0001d516",
-    "ShortDownArrow;": "\u2193",
-    "ShortLeftArrow;": "\u2190",
-    "ShortRightArrow;": "\u2192",
-    "ShortUpArrow;": "\u2191",
-    "Sigma;": "\u03a3",
-    "SmallCircle;": "\u2218",
-    "Sopf;": "\U0001d54a",
-    "Sqrt;": "\u221a",
-    "Square;": "\u25a1",
-    "SquareIntersection;": "\u2293",
-    "SquareSubset;": "\u228f",
-    "SquareSubsetEqual;": "\u2291",
-    "SquareSuperset;": "\u2290",
-    "SquareSupersetEqual;": "\u2292",
-    "SquareUnion;": "\u2294",
-    "Sscr;": "\U0001d4ae",
-    "Star;": "\u22c6",
-    "Sub;": "\u22d0",
-    "Subset;": "\u22d0",
-    "SubsetEqual;": "\u2286",
-    "Succeeds;": "\u227b",
-    "SucceedsEqual;": "\u2ab0",
-    "SucceedsSlantEqual;": "\u227d",
-    "SucceedsTilde;": "\u227f",
-    "SuchThat;": "\u220b",
-    "Sum;": "\u2211",
-    "Sup;": "\u22d1",
-    "Superset;": "\u2283",
-    "SupersetEqual;": "\u2287",
-    "Supset;": "\u22d1",
-    "THORN": "\xde",
-    "THORN;": "\xde",
-    "TRADE;": "\u2122",
-    "TSHcy;": "\u040b",
-    "TScy;": "\u0426",
-    "Tab;": "\t",
-    "Tau;": "\u03a4",
-    "Tcaron;": "\u0164",
-    "Tcedil;": "\u0162",
-    "Tcy;": "\u0422",
-    "Tfr;": "\U0001d517",
-    "Therefore;": "\u2234",
-    "Theta;": "\u0398",
-    "ThickSpace;": "\u205f\u200a",
-    "ThinSpace;": "\u2009",
-    "Tilde;": "\u223c",
-    "TildeEqual;": "\u2243",
-    "TildeFullEqual;": "\u2245",
-    "TildeTilde;": "\u2248",
-    "Topf;": "\U0001d54b",
-    "TripleDot;": "\u20db",
-    "Tscr;": "\U0001d4af",
-    "Tstrok;": "\u0166",
-    "Uacute": "\xda",
-    "Uacute;": "\xda",
-    "Uarr;": "\u219f",
-    "Uarrocir;": "\u2949",
-    "Ubrcy;": "\u040e",
-    "Ubreve;": "\u016c",
-    "Ucirc": "\xdb",
-    "Ucirc;": "\xdb",
-    "Ucy;": "\u0423",
-    "Udblac;": "\u0170",
-    "Ufr;": "\U0001d518",
-    "Ugrave": "\xd9",
-    "Ugrave;": "\xd9",
-    "Umacr;": "\u016a",
-    "UnderBar;": "_",
-    "UnderBrace;": "\u23df",
-    "UnderBracket;": "\u23b5",
-    "UnderParenthesis;": "\u23dd",
-    "Union;": "\u22c3",
-    "UnionPlus;": "\u228e",
-    "Uogon;": "\u0172",
-    "Uopf;": "\U0001d54c",
-    "UpArrow;": "\u2191",
-    "UpArrowBar;": "\u2912",
-    "UpArrowDownArrow;": "\u21c5",
-    "UpDownArrow;": "\u2195",
-    "UpEquilibrium;": "\u296e",
-    "UpTee;": "\u22a5",
-    "UpTeeArrow;": "\u21a5",
-    "Uparrow;": "\u21d1",
-    "Updownarrow;": "\u21d5",
-    "UpperLeftArrow;": "\u2196",
-    "UpperRightArrow;": "\u2197",
-    "Upsi;": "\u03d2",
-    "Upsilon;": "\u03a5",
-    "Uring;": "\u016e",
-    "Uscr;": "\U0001d4b0",
-    "Utilde;": "\u0168",
-    "Uuml": "\xdc",
-    "Uuml;": "\xdc",
-    "VDash;": "\u22ab",
-    "Vbar;": "\u2aeb",
-    "Vcy;": "\u0412",
-    "Vdash;": "\u22a9",
-    "Vdashl;": "\u2ae6",
-    "Vee;": "\u22c1",
-    "Verbar;": "\u2016",
-    "Vert;": "\u2016",
-    "VerticalBar;": "\u2223",
-    "VerticalLine;": "|",
-    "VerticalSeparator;": "\u2758",
-    "VerticalTilde;": "\u2240",
-    "VeryThinSpace;": "\u200a",
-    "Vfr;": "\U0001d519",
-    "Vopf;": "\U0001d54d",
-    "Vscr;": "\U0001d4b1",
-    "Vvdash;": "\u22aa",
-    "Wcirc;": "\u0174",
-    "Wedge;": "\u22c0",
-    "Wfr;": "\U0001d51a",
-    "Wopf;": "\U0001d54e",
-    "Wscr;": "\U0001d4b2",
-    "Xfr;": "\U0001d51b",
-    "Xi;": "\u039e",
-    "Xopf;": "\U0001d54f",
-    "Xscr;": "\U0001d4b3",
-    "YAcy;": "\u042f",
-    "YIcy;": "\u0407",
-    "YUcy;": "\u042e",
-    "Yacute": "\xdd",
-    "Yacute;": "\xdd",
-    "Ycirc;": "\u0176",
-    "Ycy;": "\u042b",
-    "Yfr;": "\U0001d51c",
-    "Yopf;": "\U0001d550",
-    "Yscr;": "\U0001d4b4",
-    "Yuml;": "\u0178",
-    "ZHcy;": "\u0416",
-    "Zacute;": "\u0179",
-    "Zcaron;": "\u017d",
-    "Zcy;": "\u0417",
-    "Zdot;": "\u017b",
-    "ZeroWidthSpace;": "\u200b",
-    "Zeta;": "\u0396",
-    "Zfr;": "\u2128",
-    "Zopf;": "\u2124",
-    "Zscr;": "\U0001d4b5",
-    "aacute": "\xe1",
-    "aacute;": "\xe1",
-    "abreve;": "\u0103",
-    "ac;": "\u223e",
-    "acE;": "\u223e\u0333",
-    "acd;": "\u223f",
-    "acirc": "\xe2",
-    "acirc;": "\xe2",
-    "acute": "\xb4",
-    "acute;": "\xb4",
-    "acy;": "\u0430",
-    "aelig": "\xe6",
-    "aelig;": "\xe6",
-    "af;": "\u2061",
-    "afr;": "\U0001d51e",
-    "agrave": "\xe0",
-    "agrave;": "\xe0",
-    "alefsym;": "\u2135",
-    "aleph;": "\u2135",
-    "alpha;": "\u03b1",
-    "amacr;": "\u0101",
-    "amalg;": "\u2a3f",
-    "amp": "&",
-    "amp;": "&",
-    "and;": "\u2227",
-    "andand;": "\u2a55",
-    "andd;": "\u2a5c",
-    "andslope;": "\u2a58",
-    "andv;": "\u2a5a",
-    "ang;": "\u2220",
-    "ange;": "\u29a4",
-    "angle;": "\u2220",
-    "angmsd;": "\u2221",
-    "angmsdaa;": "\u29a8",
-    "angmsdab;": "\u29a9",
-    "angmsdac;": "\u29aa",
-    "angmsdad;": "\u29ab",
-    "angmsdae;": "\u29ac",
-    "angmsdaf;": "\u29ad",
-    "angmsdag;": "\u29ae",
-    "angmsdah;": "\u29af",
-    "angrt;": "\u221f",
-    "angrtvb;": "\u22be",
-    "angrtvbd;": "\u299d",
-    "angsph;": "\u2222",
-    "angst;": "\xc5",
-    "angzarr;": "\u237c",
-    "aogon;": "\u0105",
-    "aopf;": "\U0001d552",
-    "ap;": "\u2248",
-    "apE;": "\u2a70",
-    "apacir;": "\u2a6f",
-    "ape;": "\u224a",
-    "apid;": "\u224b",
-    "apos;": "'",
-    "approx;": "\u2248",
-    "approxeq;": "\u224a",
-    "aring": "\xe5",
-    "aring;": "\xe5",
-    "ascr;": "\U0001d4b6",
-    "ast;": "*",
-    "asymp;": "\u2248",
-    "asympeq;": "\u224d",
-    "atilde": "\xe3",
-    "atilde;": "\xe3",
-    "auml": "\xe4",
-    "auml;": "\xe4",
-    "awconint;": "\u2233",
-    "awint;": "\u2a11",
-    "bNot;": "\u2aed",
-    "backcong;": "\u224c",
-    "backepsilon;": "\u03f6",
-    "backprime;": "\u2035",
-    "backsim;": "\u223d",
-    "backsimeq;": "\u22cd",
-    "barvee;": "\u22bd",
-    "barwed;": "\u2305",
-    "barwedge;": "\u2305",
-    "bbrk;": "\u23b5",
-    "bbrktbrk;": "\u23b6",
-    "bcong;": "\u224c",
-    "bcy;": "\u0431",
-    "bdquo;": "\u201e",
-    "becaus;": "\u2235",
-    "because;": "\u2235",
-    "bemptyv;": "\u29b0",
-    "bepsi;": "\u03f6",
-    "bernou;": "\u212c",
-    "beta;": "\u03b2",
-    "beth;": "\u2136",
-    "between;": "\u226c",
-    "bfr;": "\U0001d51f",
-    "bigcap;": "\u22c2",
-    "bigcirc;": "\u25ef",
-    "bigcup;": "\u22c3",
-    "bigodot;": "\u2a00",
-    "bigoplus;": "\u2a01",
-    "bigotimes;": "\u2a02",
-    "bigsqcup;": "\u2a06",
-    "bigstar;": "\u2605",
-    "bigtriangledown;": "\u25bd",
-    "bigtriangleup;": "\u25b3",
-    "biguplus;": "\u2a04",
-    "bigvee;": "\u22c1",
-    "bigwedge;": "\u22c0",
-    "bkarow;": "\u290d",
-    "blacklozenge;": "\u29eb",
-    "blacksquare;": "\u25aa",
-    "blacktriangle;": "\u25b4",
-    "blacktriangledown;": "\u25be",
-    "blacktriangleleft;": "\u25c2",
-    "blacktriangleright;": "\u25b8",
-    "blank;": "\u2423",
-    "blk12;": "\u2592",
-    "blk14;": "\u2591",
-    "blk34;": "\u2593",
-    "block;": "\u2588",
-    "bne;": "=\u20e5",
-    "bnequiv;": "\u2261\u20e5",
-    "bnot;": "\u2310",
-    "bopf;": "\U0001d553",
-    "bot;": "\u22a5",
-    "bottom;": "\u22a5",
-    "bowtie;": "\u22c8",
-    "boxDL;": "\u2557",
-    "boxDR;": "\u2554",
-    "boxDl;": "\u2556",
-    "boxDr;": "\u2553",
-    "boxH;": "\u2550",
-    "boxHD;": "\u2566",
-    "boxHU;": "\u2569",
-    "boxHd;": "\u2564",
-    "boxHu;": "\u2567",
-    "boxUL;": "\u255d",
-    "boxUR;": "\u255a",
-    "boxUl;": "\u255c",
-    "boxUr;": "\u2559",
-    "boxV;": "\u2551",
-    "boxVH;": "\u256c",
-    "boxVL;": "\u2563",
-    "boxVR;": "\u2560",
-    "boxVh;": "\u256b",
-    "boxVl;": "\u2562",
-    "boxVr;": "\u255f",
-    "boxbox;": "\u29c9",
-    "boxdL;": "\u2555",
-    "boxdR;": "\u2552",
-    "boxdl;": "\u2510",
-    "boxdr;": "\u250c",
-    "boxh;": "\u2500",
-    "boxhD;": "\u2565",
-    "boxhU;": "\u2568",
-    "boxhd;": "\u252c",
-    "boxhu;": "\u2534",
-    "boxminus;": "\u229f",
-    "boxplus;": "\u229e",
-    "boxtimes;": "\u22a0",
-    "boxuL;": "\u255b",
-    "boxuR;": "\u2558",
-    "boxul;": "\u2518",
-    "boxur;": "\u2514",
-    "boxv;": "\u2502",
-    "boxvH;": "\u256a",
-    "boxvL;": "\u2561",
-    "boxvR;": "\u255e",
-    "boxvh;": "\u253c",
-    "boxvl;": "\u2524",
-    "boxvr;": "\u251c",
-    "bprime;": "\u2035",
-    "breve;": "\u02d8",
-    "brvbar": "\xa6",
-    "brvbar;": "\xa6",
-    "bscr;": "\U0001d4b7",
-    "bsemi;": "\u204f",
-    "bsim;": "\u223d",
-    "bsime;": "\u22cd",
-    "bsol;": "\\",
-    "bsolb;": "\u29c5",
-    "bsolhsub;": "\u27c8",
-    "bull;": "\u2022",
-    "bullet;": "\u2022",
-    "bump;": "\u224e",
-    "bumpE;": "\u2aae",
-    "bumpe;": "\u224f",
-    "bumpeq;": "\u224f",
-    "cacute;": "\u0107",
-    "cap;": "\u2229",
-    "capand;": "\u2a44",
-    "capbrcup;": "\u2a49",
-    "capcap;": "\u2a4b",
-    "capcup;": "\u2a47",
-    "capdot;": "\u2a40",
-    "caps;": "\u2229\ufe00",
-    "caret;": "\u2041",
-    "caron;": "\u02c7",
-    "ccaps;": "\u2a4d",
-    "ccaron;": "\u010d",
-    "ccedil": "\xe7",
-    "ccedil;": "\xe7",
-    "ccirc;": "\u0109",
-    "ccups;": "\u2a4c",
-    "ccupssm;": "\u2a50",
-    "cdot;": "\u010b",
-    "cedil": "\xb8",
-    "cedil;": "\xb8",
-    "cemptyv;": "\u29b2",
-    "cent": "\xa2",
-    "cent;": "\xa2",
-    "centerdot;": "\xb7",
-    "cfr;": "\U0001d520",
-    "chcy;": "\u0447",
-    "check;": "\u2713",
-    "checkmark;": "\u2713",
-    "chi;": "\u03c7",
-    "cir;": "\u25cb",
-    "cirE;": "\u29c3",
-    "circ;": "\u02c6",
-    "circeq;": "\u2257",
-    "circlearrowleft;": "\u21ba",
-    "circlearrowright;": "\u21bb",
-    "circledR;": "\xae",
-    "circledS;": "\u24c8",
-    "circledast;": "\u229b",
-    "circledcirc;": "\u229a",
-    "circleddash;": "\u229d",
-    "cire;": "\u2257",
-    "cirfnint;": "\u2a10",
-    "cirmid;": "\u2aef",
-    "cirscir;": "\u29c2",
-    "clubs;": "\u2663",
-    "clubsuit;": "\u2663",
-    "colon;": ":",
-    "colone;": "\u2254",
-    "coloneq;": "\u2254",
-    "comma;": ",",
-    "commat;": "@",
-    "comp;": "\u2201",
-    "compfn;": "\u2218",
-    "complement;": "\u2201",
-    "complexes;": "\u2102",
-    "cong;": "\u2245",
-    "congdot;": "\u2a6d",
-    "conint;": "\u222e",
-    "copf;": "\U0001d554",
-    "coprod;": "\u2210",
-    "copy": "\xa9",
-    "copy;": "\xa9",
-    "copysr;": "\u2117",
-    "crarr;": "\u21b5",
-    "cross;": "\u2717",
-    "cscr;": "\U0001d4b8",
-    "csub;": "\u2acf",
-    "csube;": "\u2ad1",
-    "csup;": "\u2ad0",
-    "csupe;": "\u2ad2",
-    "ctdot;": "\u22ef",
-    "cudarrl;": "\u2938",
-    "cudarrr;": "\u2935",
-    "cuepr;": "\u22de",
-    "cuesc;": "\u22df",
-    "cularr;": "\u21b6",
-    "cularrp;": "\u293d",
-    "cup;": "\u222a",
-    "cupbrcap;": "\u2a48",
-    "cupcap;": "\u2a46",
-    "cupcup;": "\u2a4a",
-    "cupdot;": "\u228d",
-    "cupor;": "\u2a45",
-    "cups;": "\u222a\ufe00",
-    "curarr;": "\u21b7",
-    "curarrm;": "\u293c",
-    "curlyeqprec;": "\u22de",
-    "curlyeqsucc;": "\u22df",
-    "curlyvee;": "\u22ce",
-    "curlywedge;": "\u22cf",
-    "curren": "\xa4",
-    "curren;": "\xa4",
-    "curvearrowleft;": "\u21b6",
-    "curvearrowright;": "\u21b7",
-    "cuvee;": "\u22ce",
-    "cuwed;": "\u22cf",
-    "cwconint;": "\u2232",
-    "cwint;": "\u2231",
-    "cylcty;": "\u232d",
-    "dArr;": "\u21d3",
-    "dHar;": "\u2965",
-    "dagger;": "\u2020",
-    "daleth;": "\u2138",
-    "darr;": "\u2193",
-    "dash;": "\u2010",
-    "dashv;": "\u22a3",
-    "dbkarow;": "\u290f",
-    "dblac;": "\u02dd",
-    "dcaron;": "\u010f",
-    "dcy;": "\u0434",
-    "dd;": "\u2146",
-    "ddagger;": "\u2021",
-    "ddarr;": "\u21ca",
-    "ddotseq;": "\u2a77",
-    "deg": "\xb0",
-    "deg;": "\xb0",
-    "delta;": "\u03b4",
-    "demptyv;": "\u29b1",
-    "dfisht;": "\u297f",
-    "dfr;": "\U0001d521",
-    "dharl;": "\u21c3",
-    "dharr;": "\u21c2",
-    "diam;": "\u22c4",
-    "diamond;": "\u22c4",
-    "diamondsuit;": "\u2666",
-    "diams;": "\u2666",
-    "die;": "\xa8",
-    "digamma;": "\u03dd",
-    "disin;": "\u22f2",
-    "div;": "\xf7",
-    "divide": "\xf7",
-    "divide;": "\xf7",
-    "divideontimes;": "\u22c7",
-    "divonx;": "\u22c7",
-    "djcy;": "\u0452",
-    "dlcorn;": "\u231e",
-    "dlcrop;": "\u230d",
-    "dollar;": "$",
-    "dopf;": "\U0001d555",
-    "dot;": "\u02d9",
-    "doteq;": "\u2250",
-    "doteqdot;": "\u2251",
-    "dotminus;": "\u2238",
-    "dotplus;": "\u2214",
-    "dotsquare;": "\u22a1",
-    "doublebarwedge;": "\u2306",
-    "downarrow;": "\u2193",
-    "downdownarrows;": "\u21ca",
-    "downharpoonleft;": "\u21c3",
-    "downharpoonright;": "\u21c2",
-    "drbkarow;": "\u2910",
-    "drcorn;": "\u231f",
-    "drcrop;": "\u230c",
-    "dscr;": "\U0001d4b9",
-    "dscy;": "\u0455",
-    "dsol;": "\u29f6",
-    "dstrok;": "\u0111",
-    "dtdot;": "\u22f1",
-    "dtri;": "\u25bf",
-    "dtrif;": "\u25be",
-    "duarr;": "\u21f5",
-    "duhar;": "\u296f",
-    "dwangle;": "\u29a6",
-    "dzcy;": "\u045f",
-    "dzigrarr;": "\u27ff",
-    "eDDot;": "\u2a77",
-    "eDot;": "\u2251",
-    "eacute": "\xe9",
-    "eacute;": "\xe9",
-    "easter;": "\u2a6e",
-    "ecaron;": "\u011b",
-    "ecir;": "\u2256",
-    "ecirc": "\xea",
-    "ecirc;": "\xea",
-    "ecolon;": "\u2255",
-    "ecy;": "\u044d",
-    "edot;": "\u0117",
-    "ee;": "\u2147",
-    "efDot;": "\u2252",
-    "efr;": "\U0001d522",
-    "eg;": "\u2a9a",
-    "egrave": "\xe8",
-    "egrave;": "\xe8",
-    "egs;": "\u2a96",
-    "egsdot;": "\u2a98",
-    "el;": "\u2a99",
-    "elinters;": "\u23e7",
-    "ell;": "\u2113",
-    "els;": "\u2a95",
-    "elsdot;": "\u2a97",
-    "emacr;": "\u0113",
-    "empty;": "\u2205",
-    "emptyset;": "\u2205",
-    "emptyv;": "\u2205",
-    "emsp13;": "\u2004",
-    "emsp14;": "\u2005",
-    "emsp;": "\u2003",
-    "eng;": "\u014b",
-    "ensp;": "\u2002",
-    "eogon;": "\u0119",
-    "eopf;": "\U0001d556",
-    "epar;": "\u22d5",
-    "eparsl;": "\u29e3",
-    "eplus;": "\u2a71",
-    "epsi;": "\u03b5",
-    "epsilon;": "\u03b5",
-    "epsiv;": "\u03f5",
-    "eqcirc;": "\u2256",
-    "eqcolon;": "\u2255",
-    "eqsim;": "\u2242",
-    "eqslantgtr;": "\u2a96",
-    "eqslantless;": "\u2a95",
-    "equals;": "=",
-    "equest;": "\u225f",
-    "equiv;": "\u2261",
-    "equivDD;": "\u2a78",
-    "eqvparsl;": "\u29e5",
-    "erDot;": "\u2253",
-    "erarr;": "\u2971",
-    "escr;": "\u212f",
-    "esdot;": "\u2250",
-    "esim;": "\u2242",
-    "eta;": "\u03b7",
-    "eth": "\xf0",
-    "eth;": "\xf0",
-    "euml": "\xeb",
-    "euml;": "\xeb",
-    "euro;": "\u20ac",
-    "excl;": "!",
-    "exist;": "\u2203",
-    "expectation;": "\u2130",
-    "exponentiale;": "\u2147",
-    "fallingdotseq;": "\u2252",
-    "fcy;": "\u0444",
-    "female;": "\u2640",
-    "ffilig;": "\ufb03",
-    "fflig;": "\ufb00",
-    "ffllig;": "\ufb04",
-    "ffr;": "\U0001d523",
-    "filig;": "\ufb01",
-    "fjlig;": "fj",
-    "flat;": "\u266d",
-    "fllig;": "\ufb02",
-    "fltns;": "\u25b1",
-    "fnof;": "\u0192",
-    "fopf;": "\U0001d557",
-    "forall;": "\u2200",
-    "fork;": "\u22d4",
-    "forkv;": "\u2ad9",
-    "fpartint;": "\u2a0d",
-    "frac12": "\xbd",
-    "frac12;": "\xbd",
-    "frac13;": "\u2153",
-    "frac14": "\xbc",
-    "frac14;": "\xbc",
-    "frac15;": "\u2155",
-    "frac16;": "\u2159",
-    "frac18;": "\u215b",
-    "frac23;": "\u2154",
-    "frac25;": "\u2156",
-    "frac34": "\xbe",
-    "frac34;": "\xbe",
-    "frac35;": "\u2157",
-    "frac38;": "\u215c",
-    "frac45;": "\u2158",
-    "frac56;": "\u215a",
-    "frac58;": "\u215d",
-    "frac78;": "\u215e",
-    "frasl;": "\u2044",
-    "frown;": "\u2322",
-    "fscr;": "\U0001d4bb",
-    "gE;": "\u2267",
-    "gEl;": "\u2a8c",
-    "gacute;": "\u01f5",
-    "gamma;": "\u03b3",
-    "gammad;": "\u03dd",
-    "gap;": "\u2a86",
-    "gbreve;": "\u011f",
-    "gcirc;": "\u011d",
-    "gcy;": "\u0433",
-    "gdot;": "\u0121",
-    "ge;": "\u2265",
-    "gel;": "\u22db",
-    "geq;": "\u2265",
-    "geqq;": "\u2267",
-    "geqslant;": "\u2a7e",
-    "ges;": "\u2a7e",
-    "gescc;": "\u2aa9",
-    "gesdot;": "\u2a80",
-    "gesdoto;": "\u2a82",
-    "gesdotol;": "\u2a84",
-    "gesl;": "\u22db\ufe00",
-    "gesles;": "\u2a94",
-    "gfr;": "\U0001d524",
-    "gg;": "\u226b",
-    "ggg;": "\u22d9",
-    "gimel;": "\u2137",
-    "gjcy;": "\u0453",
-    "gl;": "\u2277",
-    "glE;": "\u2a92",
-    "gla;": "\u2aa5",
-    "glj;": "\u2aa4",
-    "gnE;": "\u2269",
-    "gnap;": "\u2a8a",
-    "gnapprox;": "\u2a8a",
-    "gne;": "\u2a88",
-    "gneq;": "\u2a88",
-    "gneqq;": "\u2269",
-    "gnsim;": "\u22e7",
-    "gopf;": "\U0001d558",
-    "grave;": "`",
-    "gscr;": "\u210a",
-    "gsim;": "\u2273",
-    "gsime;": "\u2a8e",
-    "gsiml;": "\u2a90",
-    "gt": ">",
-    "gt;": ">",
-    "gtcc;": "\u2aa7",
-    "gtcir;": "\u2a7a",
-    "gtdot;": "\u22d7",
-    "gtlPar;": "\u2995",
-    "gtquest;": "\u2a7c",
-    "gtrapprox;": "\u2a86",
-    "gtrarr;": "\u2978",
-    "gtrdot;": "\u22d7",
-    "gtreqless;": "\u22db",
-    "gtreqqless;": "\u2a8c",
-    "gtrless;": "\u2277",
-    "gtrsim;": "\u2273",
-    "gvertneqq;": "\u2269\ufe00",
-    "gvnE;": "\u2269\ufe00",
-    "hArr;": "\u21d4",
-    "hairsp;": "\u200a",
-    "half;": "\xbd",
-    "hamilt;": "\u210b",
-    "hardcy;": "\u044a",
-    "harr;": "\u2194",
-    "harrcir;": "\u2948",
-    "harrw;": "\u21ad",
-    "hbar;": "\u210f",
-    "hcirc;": "\u0125",
-    "hearts;": "\u2665",
-    "heartsuit;": "\u2665",
-    "hellip;": "\u2026",
-    "hercon;": "\u22b9",
-    "hfr;": "\U0001d525",
-    "hksearow;": "\u2925",
-    "hkswarow;": "\u2926",
-    "hoarr;": "\u21ff",
-    "homtht;": "\u223b",
-    "hookleftarrow;": "\u21a9",
-    "hookrightarrow;": "\u21aa",
-    "hopf;": "\U0001d559",
-    "horbar;": "\u2015",
-    "hscr;": "\U0001d4bd",
-    "hslash;": "\u210f",
-    "hstrok;": "\u0127",
-    "hybull;": "\u2043",
-    "hyphen;": "\u2010",
-    "iacute": "\xed",
-    "iacute;": "\xed",
-    "ic;": "\u2063",
-    "icirc": "\xee",
-    "icirc;": "\xee",
-    "icy;": "\u0438",
-    "iecy;": "\u0435",
-    "iexcl": "\xa1",
-    "iexcl;": "\xa1",
-    "iff;": "\u21d4",
-    "ifr;": "\U0001d526",
-    "igrave": "\xec",
-    "igrave;": "\xec",
-    "ii;": "\u2148",
-    "iiiint;": "\u2a0c",
-    "iiint;": "\u222d",
-    "iinfin;": "\u29dc",
-    "iiota;": "\u2129",
-    "ijlig;": "\u0133",
-    "imacr;": "\u012b",
-    "image;": "\u2111",
-    "imagline;": "\u2110",
-    "imagpart;": "\u2111",
-    "imath;": "\u0131",
-    "imof;": "\u22b7",
-    "imped;": "\u01b5",
-    "in;": "\u2208",
-    "incare;": "\u2105",
-    "infin;": "\u221e",
-    "infintie;": "\u29dd",
-    "inodot;": "\u0131",
-    "int;": "\u222b",
-    "intcal;": "\u22ba",
-    "integers;": "\u2124",
-    "intercal;": "\u22ba",
-    "intlarhk;": "\u2a17",
-    "intprod;": "\u2a3c",
-    "iocy;": "\u0451",
-    "iogon;": "\u012f",
-    "iopf;": "\U0001d55a",
-    "iota;": "\u03b9",
-    "iprod;": "\u2a3c",
-    "iquest": "\xbf",
-    "iquest;": "\xbf",
-    "iscr;": "\U0001d4be",
-    "isin;": "\u2208",
-    "isinE;": "\u22f9",
-    "isindot;": "\u22f5",
-    "isins;": "\u22f4",
-    "isinsv;": "\u22f3",
-    "isinv;": "\u2208",
-    "it;": "\u2062",
-    "itilde;": "\u0129",
-    "iukcy;": "\u0456",
-    "iuml": "\xef",
-    "iuml;": "\xef",
-    "jcirc;": "\u0135",
-    "jcy;": "\u0439",
-    "jfr;": "\U0001d527",
-    "jmath;": "\u0237",
-    "jopf;": "\U0001d55b",
-    "jscr;": "\U0001d4bf",
-    "jsercy;": "\u0458",
-    "jukcy;": "\u0454",
-    "kappa;": "\u03ba",
-    "kappav;": "\u03f0",
-    "kcedil;": "\u0137",
-    "kcy;": "\u043a",
-    "kfr;": "\U0001d528",
-    "kgreen;": "\u0138",
-    "khcy;": "\u0445",
-    "kjcy;": "\u045c",
-    "kopf;": "\U0001d55c",
-    "kscr;": "\U0001d4c0",
-    "lAarr;": "\u21da",
-    "lArr;": "\u21d0",
-    "lAtail;": "\u291b",
-    "lBarr;": "\u290e",
-    "lE;": "\u2266",
-    "lEg;": "\u2a8b",
-    "lHar;": "\u2962",
-    "lacute;": "\u013a",
-    "laemptyv;": "\u29b4",
-    "lagran;": "\u2112",
-    "lambda;": "\u03bb",
-    "lang;": "\u27e8",
-    "langd;": "\u2991",
-    "langle;": "\u27e8",
-    "lap;": "\u2a85",
-    "laquo": "\xab",
-    "laquo;": "\xab",
-    "larr;": "\u2190",
-    "larrb;": "\u21e4",
-    "larrbfs;": "\u291f",
-    "larrfs;": "\u291d",
-    "larrhk;": "\u21a9",
-    "larrlp;": "\u21ab",
-    "larrpl;": "\u2939",
-    "larrsim;": "\u2973",
-    "larrtl;": "\u21a2",
-    "lat;": "\u2aab",
-    "latail;": "\u2919",
-    "late;": "\u2aad",
-    "lates;": "\u2aad\ufe00",
-    "lbarr;": "\u290c",
-    "lbbrk;": "\u2772",
-    "lbrace;": "{",
-    "lbrack;": "[",
-    "lbrke;": "\u298b",
-    "lbrksld;": "\u298f",
-    "lbrkslu;": "\u298d",
-    "lcaron;": "\u013e",
-    "lcedil;": "\u013c",
-    "lceil;": "\u2308",
-    "lcub;": "{",
-    "lcy;": "\u043b",
-    "ldca;": "\u2936",
-    "ldquo;": "\u201c",
-    "ldquor;": "\u201e",
-    "ldrdhar;": "\u2967",
-    "ldrushar;": "\u294b",
-    "ldsh;": "\u21b2",
-    "le;": "\u2264",
-    "leftarrow;": "\u2190",
-    "leftarrowtail;": "\u21a2",
-    "leftharpoondown;": "\u21bd",
-    "leftharpoonup;": "\u21bc",
-    "leftleftarrows;": "\u21c7",
-    "leftrightarrow;": "\u2194",
-    "leftrightarrows;": "\u21c6",
-    "leftrightharpoons;": "\u21cb",
-    "leftrightsquigarrow;": "\u21ad",
-    "leftthreetimes;": "\u22cb",
-    "leg;": "\u22da",
-    "leq;": "\u2264",
-    "leqq;": "\u2266",
-    "leqslant;": "\u2a7d",
-    "les;": "\u2a7d",
-    "lescc;": "\u2aa8",
-    "lesdot;": "\u2a7f",
-    "lesdoto;": "\u2a81",
-    "lesdotor;": "\u2a83",
-    "lesg;": "\u22da\ufe00",
-    "lesges;": "\u2a93",
-    "lessapprox;": "\u2a85",
-    "lessdot;": "\u22d6",
-    "lesseqgtr;": "\u22da",
-    "lesseqqgtr;": "\u2a8b",
-    "lessgtr;": "\u2276",
-    "lesssim;": "\u2272",
-    "lfisht;": "\u297c",
-    "lfloor;": "\u230a",
-    "lfr;": "\U0001d529",
-    "lg;": "\u2276",
-    "lgE;": "\u2a91",
-    "lhard;": "\u21bd",
-    "lharu;": "\u21bc",
-    "lharul;": "\u296a",
-    "lhblk;": "\u2584",
-    "ljcy;": "\u0459",
-    "ll;": "\u226a",
-    "llarr;": "\u21c7",
-    "llcorner;": "\u231e",
-    "llhard;": "\u296b",
-    "lltri;": "\u25fa",
-    "lmidot;": "\u0140",
-    "lmoust;": "\u23b0",
-    "lmoustache;": "\u23b0",
-    "lnE;": "\u2268",
-    "lnap;": "\u2a89",
-    "lnapprox;": "\u2a89",
-    "lne;": "\u2a87",
-    "lneq;": "\u2a87",
-    "lneqq;": "\u2268",
-    "lnsim;": "\u22e6",
-    "loang;": "\u27ec",
-    "loarr;": "\u21fd",
-    "lobrk;": "\u27e6",
-    "longleftarrow;": "\u27f5",
-    "longleftrightarrow;": "\u27f7",
-    "longmapsto;": "\u27fc",
-    "longrightarrow;": "\u27f6",
-    "looparrowleft;": "\u21ab",
-    "looparrowright;": "\u21ac",
-    "lopar;": "\u2985",
-    "lopf;": "\U0001d55d",
-    "loplus;": "\u2a2d",
-    "lotimes;": "\u2a34",
-    "lowast;": "\u2217",
-    "lowbar;": "_",
-    "loz;": "\u25ca",
-    "lozenge;": "\u25ca",
-    "lozf;": "\u29eb",
-    "lpar;": "(",
-    "lparlt;": "\u2993",
-    "lrarr;": "\u21c6",
-    "lrcorner;": "\u231f",
-    "lrhar;": "\u21cb",
-    "lrhard;": "\u296d",
-    "lrm;": "\u200e",
-    "lrtri;": "\u22bf",
-    "lsaquo;": "\u2039",
-    "lscr;": "\U0001d4c1",
-    "lsh;": "\u21b0",
-    "lsim;": "\u2272",
-    "lsime;": "\u2a8d",
-    "lsimg;": "\u2a8f",
-    "lsqb;": "[",
-    "lsquo;": "\u2018",
-    "lsquor;": "\u201a",
-    "lstrok;": "\u0142",
-    "lt": "<",
-    "lt;": "<",
-    "ltcc;": "\u2aa6",
-    "ltcir;": "\u2a79",
-    "ltdot;": "\u22d6",
-    "lthree;": "\u22cb",
-    "ltimes;": "\u22c9",
-    "ltlarr;": "\u2976",
-    "ltquest;": "\u2a7b",
-    "ltrPar;": "\u2996",
-    "ltri;": "\u25c3",
-    "ltrie;": "\u22b4",
-    "ltrif;": "\u25c2",
-    "lurdshar;": "\u294a",
-    "luruhar;": "\u2966",
-    "lvertneqq;": "\u2268\ufe00",
-    "lvnE;": "\u2268\ufe00",
-    "mDDot;": "\u223a",
-    "macr": "\xaf",
-    "macr;": "\xaf",
-    "male;": "\u2642",
-    "malt;": "\u2720",
-    "maltese;": "\u2720",
-    "map;": "\u21a6",
-    "mapsto;": "\u21a6",
-    "mapstodown;": "\u21a7",
-    "mapstoleft;": "\u21a4",
-    "mapstoup;": "\u21a5",
-    "marker;": "\u25ae",
-    "mcomma;": "\u2a29",
-    "mcy;": "\u043c",
-    "mdash;": "\u2014",
-    "measuredangle;": "\u2221",
-    "mfr;": "\U0001d52a",
-    "mho;": "\u2127",
-    "micro": "\xb5",
-    "micro;": "\xb5",
-    "mid;": "\u2223",
-    "midast;": "*",
-    "midcir;": "\u2af0",
-    "middot": "\xb7",
-    "middot;": "\xb7",
-    "minus;": "\u2212",
-    "minusb;": "\u229f",
-    "minusd;": "\u2238",
-    "minusdu;": "\u2a2a",
-    "mlcp;": "\u2adb",
-    "mldr;": "\u2026",
-    "mnplus;": "\u2213",
-    "models;": "\u22a7",
-    "mopf;": "\U0001d55e",
-    "mp;": "\u2213",
-    "mscr;": "\U0001d4c2",
-    "mstpos;": "\u223e",
-    "mu;": "\u03bc",
-    "multimap;": "\u22b8",
-    "mumap;": "\u22b8",
-    "nGg;": "\u22d9\u0338",
-    "nGt;": "\u226b\u20d2",
-    "nGtv;": "\u226b\u0338",
-    "nLeftarrow;": "\u21cd",
-    "nLeftrightarrow;": "\u21ce",
-    "nLl;": "\u22d8\u0338",
-    "nLt;": "\u226a\u20d2",
-    "nLtv;": "\u226a\u0338",
-    "nRightarrow;": "\u21cf",
-    "nVDash;": "\u22af",
-    "nVdash;": "\u22ae",
-    "nabla;": "\u2207",
-    "nacute;": "\u0144",
-    "nang;": "\u2220\u20d2",
-    "nap;": "\u2249",
-    "napE;": "\u2a70\u0338",
-    "napid;": "\u224b\u0338",
-    "napos;": "\u0149",
-    "napprox;": "\u2249",
-    "natur;": "\u266e",
-    "natural;": "\u266e",
-    "naturals;": "\u2115",
-    "nbsp": "\xa0",
-    "nbsp;": "\xa0",
-    "nbump;": "\u224e\u0338",
-    "nbumpe;": "\u224f\u0338",
-    "ncap;": "\u2a43",
-    "ncaron;": "\u0148",
-    "ncedil;": "\u0146",
-    "ncong;": "\u2247",
-    "ncongdot;": "\u2a6d\u0338",
-    "ncup;": "\u2a42",
-    "ncy;": "\u043d",
-    "ndash;": "\u2013",
-    "ne;": "\u2260",
-    "neArr;": "\u21d7",
-    "nearhk;": "\u2924",
-    "nearr;": "\u2197",
-    "nearrow;": "\u2197",
-    "nedot;": "\u2250\u0338",
-    "nequiv;": "\u2262",
-    "nesear;": "\u2928",
-    "nesim;": "\u2242\u0338",
-    "nexist;": "\u2204",
-    "nexists;": "\u2204",
-    "nfr;": "\U0001d52b",
-    "ngE;": "\u2267\u0338",
-    "nge;": "\u2271",
-    "ngeq;": "\u2271",
-    "ngeqq;": "\u2267\u0338",
-    "ngeqslant;": "\u2a7e\u0338",
-    "nges;": "\u2a7e\u0338",
-    "ngsim;": "\u2275",
-    "ngt;": "\u226f",
-    "ngtr;": "\u226f",
-    "nhArr;": "\u21ce",
-    "nharr;": "\u21ae",
-    "nhpar;": "\u2af2",
-    "ni;": "\u220b",
-    "nis;": "\u22fc",
-    "nisd;": "\u22fa",
-    "niv;": "\u220b",
-    "njcy;": "\u045a",
-    "nlArr;": "\u21cd",
-    "nlE;": "\u2266\u0338",
-    "nlarr;": "\u219a",
-    "nldr;": "\u2025",
-    "nle;": "\u2270",
-    "nleftarrow;": "\u219a",
-    "nleftrightarrow;": "\u21ae",
-    "nleq;": "\u2270",
-    "nleqq;": "\u2266\u0338",
-    "nleqslant;": "\u2a7d\u0338",
-    "nles;": "\u2a7d\u0338",
-    "nless;": "\u226e",
-    "nlsim;": "\u2274",
-    "nlt;": "\u226e",
-    "nltri;": "\u22ea",
-    "nltrie;": "\u22ec",
-    "nmid;": "\u2224",
-    "nopf;": "\U0001d55f",
-    "not": "\xac",
-    "not;": "\xac",
-    "notin;": "\u2209",
-    "notinE;": "\u22f9\u0338",
-    "notindot;": "\u22f5\u0338",
-    "notinva;": "\u2209",
-    "notinvb;": "\u22f7",
-    "notinvc;": "\u22f6",
-    "notni;": "\u220c",
-    "notniva;": "\u220c",
-    "notnivb;": "\u22fe",
-    "notnivc;": "\u22fd",
-    "npar;": "\u2226",
-    "nparallel;": "\u2226",
-    "nparsl;": "\u2afd\u20e5",
-    "npart;": "\u2202\u0338",
-    "npolint;": "\u2a14",
-    "npr;": "\u2280",
-    "nprcue;": "\u22e0",
-    "npre;": "\u2aaf\u0338",
-    "nprec;": "\u2280",
-    "npreceq;": "\u2aaf\u0338",
-    "nrArr;": "\u21cf",
-    "nrarr;": "\u219b",
-    "nrarrc;": "\u2933\u0338",
-    "nrarrw;": "\u219d\u0338",
-    "nrightarrow;": "\u219b",
-    "nrtri;": "\u22eb",
-    "nrtrie;": "\u22ed",
-    "nsc;": "\u2281",
-    "nsccue;": "\u22e1",
-    "nsce;": "\u2ab0\u0338",
-    "nscr;": "\U0001d4c3",
-    "nshortmid;": "\u2224",
-    "nshortparallel;": "\u2226",
-    "nsim;": "\u2241",
-    "nsime;": "\u2244",
-    "nsimeq;": "\u2244",
-    "nsmid;": "\u2224",
-    "nspar;": "\u2226",
-    "nsqsube;": "\u22e2",
-    "nsqsupe;": "\u22e3",
-    "nsub;": "\u2284",
-    "nsubE;": "\u2ac5\u0338",
-    "nsube;": "\u2288",
-    "nsubset;": "\u2282\u20d2",
-    "nsubseteq;": "\u2288",
-    "nsubseteqq;": "\u2ac5\u0338",
-    "nsucc;": "\u2281",
-    "nsucceq;": "\u2ab0\u0338",
-    "nsup;": "\u2285",
-    "nsupE;": "\u2ac6\u0338",
-    "nsupe;": "\u2289",
-    "nsupset;": "\u2283\u20d2",
-    "nsupseteq;": "\u2289",
-    "nsupseteqq;": "\u2ac6\u0338",
-    "ntgl;": "\u2279",
-    "ntilde": "\xf1",
-    "ntilde;": "\xf1",
-    "ntlg;": "\u2278",
-    "ntriangleleft;": "\u22ea",
-    "ntrianglelefteq;": "\u22ec",
-    "ntriangleright;": "\u22eb",
-    "ntrianglerighteq;": "\u22ed",
-    "nu;": "\u03bd",
-    "num;": "#",
-    "numero;": "\u2116",
-    "numsp;": "\u2007",
-    "nvDash;": "\u22ad",
-    "nvHarr;": "\u2904",
-    "nvap;": "\u224d\u20d2",
-    "nvdash;": "\u22ac",
-    "nvge;": "\u2265\u20d2",
-    "nvgt;": ">\u20d2",
-    "nvinfin;": "\u29de",
-    "nvlArr;": "\u2902",
-    "nvle;": "\u2264\u20d2",
-    "nvlt;": "<\u20d2",
-    "nvltrie;": "\u22b4\u20d2",
-    "nvrArr;": "\u2903",
-    "nvrtrie;": "\u22b5\u20d2",
-    "nvsim;": "\u223c\u20d2",
-    "nwArr;": "\u21d6",
-    "nwarhk;": "\u2923",
-    "nwarr;": "\u2196",
-    "nwarrow;": "\u2196",
-    "nwnear;": "\u2927",
-    "oS;": "\u24c8",
-    "oacute": "\xf3",
-    "oacute;": "\xf3",
-    "oast;": "\u229b",
-    "ocir;": "\u229a",
-    "ocirc": "\xf4",
-    "ocirc;": "\xf4",
-    "ocy;": "\u043e",
-    "odash;": "\u229d",
-    "odblac;": "\u0151",
-    "odiv;": "\u2a38",
-    "odot;": "\u2299",
-    "odsold;": "\u29bc",
-    "oelig;": "\u0153",
-    "ofcir;": "\u29bf",
-    "ofr;": "\U0001d52c",
-    "ogon;": "\u02db",
-    "ograve": "\xf2",
-    "ograve;": "\xf2",
-    "ogt;": "\u29c1",
-    "ohbar;": "\u29b5",
-    "ohm;": "\u03a9",
-    "oint;": "\u222e",
-    "olarr;": "\u21ba",
-    "olcir;": "\u29be",
-    "olcross;": "\u29bb",
-    "oline;": "\u203e",
-    "olt;": "\u29c0",
-    "omacr;": "\u014d",
-    "omega;": "\u03c9",
-    "omicron;": "\u03bf",
-    "omid;": "\u29b6",
-    "ominus;": "\u2296",
-    "oopf;": "\U0001d560",
-    "opar;": "\u29b7",
-    "operp;": "\u29b9",
-    "oplus;": "\u2295",
-    "or;": "\u2228",
-    "orarr;": "\u21bb",
-    "ord;": "\u2a5d",
-    "order;": "\u2134",
-    "orderof;": "\u2134",
-    "ordf": "\xaa",
-    "ordf;": "\xaa",
-    "ordm": "\xba",
-    "ordm;": "\xba",
-    "origof;": "\u22b6",
-    "oror;": "\u2a56",
-    "orslope;": "\u2a57",
-    "orv;": "\u2a5b",
-    "oscr;": "\u2134",
-    "oslash": "\xf8",
-    "oslash;": "\xf8",
-    "osol;": "\u2298",
-    "otilde": "\xf5",
-    "otilde;": "\xf5",
-    "otimes;": "\u2297",
-    "otimesas;": "\u2a36",
-    "ouml": "\xf6",
-    "ouml;": "\xf6",
-    "ovbar;": "\u233d",
-    "par;": "\u2225",
-    "para": "\xb6",
-    "para;": "\xb6",
-    "parallel;": "\u2225",
-    "parsim;": "\u2af3",
-    "parsl;": "\u2afd",
-    "part;": "\u2202",
-    "pcy;": "\u043f",
-    "percnt;": "%",
-    "period;": ".",
-    "permil;": "\u2030",
-    "perp;": "\u22a5",
-    "pertenk;": "\u2031",
-    "pfr;": "\U0001d52d",
-    "phi;": "\u03c6",
-    "phiv;": "\u03d5",
-    "phmmat;": "\u2133",
-    "phone;": "\u260e",
-    "pi;": "\u03c0",
-    "pitchfork;": "\u22d4",
-    "piv;": "\u03d6",
-    "planck;": "\u210f",
-    "planckh;": "\u210e",
-    "plankv;": "\u210f",
-    "plus;": "+",
-    "plusacir;": "\u2a23",
-    "plusb;": "\u229e",
-    "pluscir;": "\u2a22",
-    "plusdo;": "\u2214",
-    "plusdu;": "\u2a25",
-    "pluse;": "\u2a72",
-    "plusmn": "\xb1",
-    "plusmn;": "\xb1",
-    "plussim;": "\u2a26",
-    "plustwo;": "\u2a27",
-    "pm;": "\xb1",
-    "pointint;": "\u2a15",
-    "popf;": "\U0001d561",
-    "pound": "\xa3",
-    "pound;": "\xa3",
-    "pr;": "\u227a",
-    "prE;": "\u2ab3",
-    "prap;": "\u2ab7",
-    "prcue;": "\u227c",
-    "pre;": "\u2aaf",
-    "prec;": "\u227a",
-    "precapprox;": "\u2ab7",
-    "preccurlyeq;": "\u227c",
-    "preceq;": "\u2aaf",
-    "precnapprox;": "\u2ab9",
-    "precneqq;": "\u2ab5",
-    "precnsim;": "\u22e8",
-    "precsim;": "\u227e",
-    "prime;": "\u2032",
-    "primes;": "\u2119",
-    "prnE;": "\u2ab5",
-    "prnap;": "\u2ab9",
-    "prnsim;": "\u22e8",
-    "prod;": "\u220f",
-    "profalar;": "\u232e",
-    "profline;": "\u2312",
-    "profsurf;": "\u2313",
-    "prop;": "\u221d",
-    "propto;": "\u221d",
-    "prsim;": "\u227e",
-    "prurel;": "\u22b0",
-    "pscr;": "\U0001d4c5",
-    "psi;": "\u03c8",
-    "puncsp;": "\u2008",
-    "qfr;": "\U0001d52e",
-    "qint;": "\u2a0c",
-    "qopf;": "\U0001d562",
-    "qprime;": "\u2057",
-    "qscr;": "\U0001d4c6",
-    "quaternions;": "\u210d",
-    "quatint;": "\u2a16",
-    "quest;": "?",
-    "questeq;": "\u225f",
-    "quot": "\"",
-    "quot;": "\"",
-    "rAarr;": "\u21db",
-    "rArr;": "\u21d2",
-    "rAtail;": "\u291c",
-    "rBarr;": "\u290f",
-    "rHar;": "\u2964",
-    "race;": "\u223d\u0331",
-    "racute;": "\u0155",
-    "radic;": "\u221a",
-    "raemptyv;": "\u29b3",
-    "rang;": "\u27e9",
-    "rangd;": "\u2992",
-    "range;": "\u29a5",
-    "rangle;": "\u27e9",
-    "raquo": "\xbb",
-    "raquo;": "\xbb",
-    "rarr;": "\u2192",
-    "rarrap;": "\u2975",
-    "rarrb;": "\u21e5",
-    "rarrbfs;": "\u2920",
-    "rarrc;": "\u2933",
-    "rarrfs;": "\u291e",
-    "rarrhk;": "\u21aa",
-    "rarrlp;": "\u21ac",
-    "rarrpl;": "\u2945",
-    "rarrsim;": "\u2974",
-    "rarrtl;": "\u21a3",
-    "rarrw;": "\u219d",
-    "ratail;": "\u291a",
-    "ratio;": "\u2236",
-    "rationals;": "\u211a",
-    "rbarr;": "\u290d",
-    "rbbrk;": "\u2773",
-    "rbrace;": "}",
-    "rbrack;": "]",
-    "rbrke;": "\u298c",
-    "rbrksld;": "\u298e",
-    "rbrkslu;": "\u2990",
-    "rcaron;": "\u0159",
-    "rcedil;": "\u0157",
-    "rceil;": "\u2309",
-    "rcub;": "}",
-    "rcy;": "\u0440",
-    "rdca;": "\u2937",
-    "rdldhar;": "\u2969",
-    "rdquo;": "\u201d",
-    "rdquor;": "\u201d",
-    "rdsh;": "\u21b3",
-    "real;": "\u211c",
-    "realine;": "\u211b",
-    "realpart;": "\u211c",
-    "reals;": "\u211d",
-    "rect;": "\u25ad",
-    "reg": "\xae",
-    "reg;": "\xae",
-    "rfisht;": "\u297d",
-    "rfloor;": "\u230b",
-    "rfr;": "\U0001d52f",
-    "rhard;": "\u21c1",
-    "rharu;": "\u21c0",
-    "rharul;": "\u296c",
-    "rho;": "\u03c1",
-    "rhov;": "\u03f1",
-    "rightarrow;": "\u2192",
-    "rightarrowtail;": "\u21a3",
-    "rightharpoondown;": "\u21c1",
-    "rightharpoonup;": "\u21c0",
-    "rightleftarrows;": "\u21c4",
-    "rightleftharpoons;": "\u21cc",
-    "rightrightarrows;": "\u21c9",
-    "rightsquigarrow;": "\u219d",
-    "rightthreetimes;": "\u22cc",
-    "ring;": "\u02da",
-    "risingdotseq;": "\u2253",
-    "rlarr;": "\u21c4",
-    "rlhar;": "\u21cc",
-    "rlm;": "\u200f",
-    "rmoust;": "\u23b1",
-    "rmoustache;": "\u23b1",
-    "rnmid;": "\u2aee",
-    "roang;": "\u27ed",
-    "roarr;": "\u21fe",
-    "robrk;": "\u27e7",
-    "ropar;": "\u2986",
-    "ropf;": "\U0001d563",
-    "roplus;": "\u2a2e",
-    "rotimes;": "\u2a35",
-    "rpar;": ")",
-    "rpargt;": "\u2994",
-    "rppolint;": "\u2a12",
-    "rrarr;": "\u21c9",
-    "rsaquo;": "\u203a",
-    "rscr;": "\U0001d4c7",
-    "rsh;": "\u21b1",
-    "rsqb;": "]",
-    "rsquo;": "\u2019",
-    "rsquor;": "\u2019",
-    "rthree;": "\u22cc",
-    "rtimes;": "\u22ca",
-    "rtri;": "\u25b9",
-    "rtrie;": "\u22b5",
-    "rtrif;": "\u25b8",
-    "rtriltri;": "\u29ce",
-    "ruluhar;": "\u2968",
-    "rx;": "\u211e",
-    "sacute;": "\u015b",
-    "sbquo;": "\u201a",
-    "sc;": "\u227b",
-    "scE;": "\u2ab4",
-    "scap;": "\u2ab8",
-    "scaron;": "\u0161",
-    "sccue;": "\u227d",
-    "sce;": "\u2ab0",
-    "scedil;": "\u015f",
-    "scirc;": "\u015d",
-    "scnE;": "\u2ab6",
-    "scnap;": "\u2aba",
-    "scnsim;": "\u22e9",
-    "scpolint;": "\u2a13",
-    "scsim;": "\u227f",
-    "scy;": "\u0441",
-    "sdot;": "\u22c5",
-    "sdotb;": "\u22a1",
-    "sdote;": "\u2a66",
-    "seArr;": "\u21d8",
-    "searhk;": "\u2925",
-    "searr;": "\u2198",
-    "searrow;": "\u2198",
-    "sect": "\xa7",
-    "sect;": "\xa7",
-    "semi;": ";",
-    "seswar;": "\u2929",
-    "setminus;": "\u2216",
-    "setmn;": "\u2216",
-    "sext;": "\u2736",
-    "sfr;": "\U0001d530",
-    "sfrown;": "\u2322",
-    "sharp;": "\u266f",
-    "shchcy;": "\u0449",
-    "shcy;": "\u0448",
-    "shortmid;": "\u2223",
-    "shortparallel;": "\u2225",
-    "shy": "\xad",
-    "shy;": "\xad",
-    "sigma;": "\u03c3",
-    "sigmaf;": "\u03c2",
-    "sigmav;": "\u03c2",
-    "sim;": "\u223c",
-    "simdot;": "\u2a6a",
-    "sime;": "\u2243",
-    "simeq;": "\u2243",
-    "simg;": "\u2a9e",
-    "simgE;": "\u2aa0",
-    "siml;": "\u2a9d",
-    "simlE;": "\u2a9f",
-    "simne;": "\u2246",
-    "simplus;": "\u2a24",
-    "simrarr;": "\u2972",
-    "slarr;": "\u2190",
-    "smallsetminus;": "\u2216",
-    "smashp;": "\u2a33",
-    "smeparsl;": "\u29e4",
-    "smid;": "\u2223",
-    "smile;": "\u2323",
-    "smt;": "\u2aaa",
-    "smte;": "\u2aac",
-    "smtes;": "\u2aac\ufe00",
-    "softcy;": "\u044c",
-    "sol;": "/",
-    "solb;": "\u29c4",
-    "solbar;": "\u233f",
-    "sopf;": "\U0001d564",
-    "spades;": "\u2660",
-    "spadesuit;": "\u2660",
-    "spar;": "\u2225",
-    "sqcap;": "\u2293",
-    "sqcaps;": "\u2293\ufe00",
-    "sqcup;": "\u2294",
-    "sqcups;": "\u2294\ufe00",
-    "sqsub;": "\u228f",
-    "sqsube;": "\u2291",
-    "sqsubset;": "\u228f",
-    "sqsubseteq;": "\u2291",
-    "sqsup;": "\u2290",
-    "sqsupe;": "\u2292",
-    "sqsupset;": "\u2290",
-    "sqsupseteq;": "\u2292",
-    "squ;": "\u25a1",
-    "square;": "\u25a1",
-    "squarf;": "\u25aa",
-    "squf;": "\u25aa",
-    "srarr;": "\u2192",
-    "sscr;": "\U0001d4c8",
-    "ssetmn;": "\u2216",
-    "ssmile;": "\u2323",
-    "sstarf;": "\u22c6",
-    "star;": "\u2606",
-    "starf;": "\u2605",
-    "straightepsilon;": "\u03f5",
-    "straightphi;": "\u03d5",
-    "strns;": "\xaf",
-    "sub;": "\u2282",
-    "subE;": "\u2ac5",
-    "subdot;": "\u2abd",
-    "sube;": "\u2286",
-    "subedot;": "\u2ac3",
-    "submult;": "\u2ac1",
-    "subnE;": "\u2acb",
-    "subne;": "\u228a",
-    "subplus;": "\u2abf",
-    "subrarr;": "\u2979",
-    "subset;": "\u2282",
-    "subseteq;": "\u2286",
-    "subseteqq;": "\u2ac5",
-    "subsetneq;": "\u228a",
-    "subsetneqq;": "\u2acb",
-    "subsim;": "\u2ac7",
-    "subsub;": "\u2ad5",
-    "subsup;": "\u2ad3",
-    "succ;": "\u227b",
-    "succapprox;": "\u2ab8",
-    "succcurlyeq;": "\u227d",
-    "succeq;": "\u2ab0",
-    "succnapprox;": "\u2aba",
-    "succneqq;": "\u2ab6",
-    "succnsim;": "\u22e9",
-    "succsim;": "\u227f",
-    "sum;": "\u2211",
-    "sung;": "\u266a",
-    "sup1": "\xb9",
-    "sup1;": "\xb9",
-    "sup2": "\xb2",
-    "sup2;": "\xb2",
-    "sup3": "\xb3",
-    "sup3;": "\xb3",
-    "sup;": "\u2283",
-    "supE;": "\u2ac6",
-    "supdot;": "\u2abe",
-    "supdsub;": "\u2ad8",
-    "supe;": "\u2287",
-    "supedot;": "\u2ac4",
-    "suphsol;": "\u27c9",
-    "suphsub;": "\u2ad7",
-    "suplarr;": "\u297b",
-    "supmult;": "\u2ac2",
-    "supnE;": "\u2acc",
-    "supne;": "\u228b",
-    "supplus;": "\u2ac0",
-    "supset;": "\u2283",
-    "supseteq;": "\u2287",
-    "supseteqq;": "\u2ac6",
-    "supsetneq;": "\u228b",
-    "supsetneqq;": "\u2acc",
-    "supsim;": "\u2ac8",
-    "supsub;": "\u2ad4",
-    "supsup;": "\u2ad6",
-    "swArr;": "\u21d9",
-    "swarhk;": "\u2926",
-    "swarr;": "\u2199",
-    "swarrow;": "\u2199",
-    "swnwar;": "\u292a",
-    "szlig": "\xdf",
-    "szlig;": "\xdf",
-    "target;": "\u2316",
-    "tau;": "\u03c4",
-    "tbrk;": "\u23b4",
-    "tcaron;": "\u0165",
-    "tcedil;": "\u0163",
-    "tcy;": "\u0442",
-    "tdot;": "\u20db",
-    "telrec;": "\u2315",
-    "tfr;": "\U0001d531",
-    "there4;": "\u2234",
-    "therefore;": "\u2234",
-    "theta;": "\u03b8",
-    "thetasym;": "\u03d1",
-    "thetav;": "\u03d1",
-    "thickapprox;": "\u2248",
-    "thicksim;": "\u223c",
-    "thinsp;": "\u2009",
-    "thkap;": "\u2248",
-    "thksim;": "\u223c",
-    "thorn": "\xfe",
-    "thorn;": "\xfe",
-    "tilde;": "\u02dc",
-    "times": "\xd7",
-    "times;": "\xd7",
-    "timesb;": "\u22a0",
-    "timesbar;": "\u2a31",
-    "timesd;": "\u2a30",
-    "tint;": "\u222d",
-    "toea;": "\u2928",
-    "top;": "\u22a4",
-    "topbot;": "\u2336",
-    "topcir;": "\u2af1",
-    "topf;": "\U0001d565",
-    "topfork;": "\u2ada",
-    "tosa;": "\u2929",
-    "tprime;": "\u2034",
-    "trade;": "\u2122",
-    "triangle;": "\u25b5",
-    "triangledown;": "\u25bf",
-    "triangleleft;": "\u25c3",
-    "trianglelefteq;": "\u22b4",
-    "triangleq;": "\u225c",
-    "triangleright;": "\u25b9",
-    "trianglerighteq;": "\u22b5",
-    "tridot;": "\u25ec",
-    "trie;": "\u225c",
-    "triminus;": "\u2a3a",
-    "triplus;": "\u2a39",
-    "trisb;": "\u29cd",
-    "tritime;": "\u2a3b",
-    "trpezium;": "\u23e2",
-    "tscr;": "\U0001d4c9",
-    "tscy;": "\u0446",
-    "tshcy;": "\u045b",
-    "tstrok;": "\u0167",
-    "twixt;": "\u226c",
-    "twoheadleftarrow;": "\u219e",
-    "twoheadrightarrow;": "\u21a0",
-    "uArr;": "\u21d1",
-    "uHar;": "\u2963",
-    "uacute": "\xfa",
-    "uacute;": "\xfa",
-    "uarr;": "\u2191",
-    "ubrcy;": "\u045e",
-    "ubreve;": "\u016d",
-    "ucirc": "\xfb",
-    "ucirc;": "\xfb",
-    "ucy;": "\u0443",
-    "udarr;": "\u21c5",
-    "udblac;": "\u0171",
-    "udhar;": "\u296e",
-    "ufisht;": "\u297e",
-    "ufr;": "\U0001d532",
-    "ugrave": "\xf9",
-    "ugrave;": "\xf9",
-    "uharl;": "\u21bf",
-    "uharr;": "\u21be",
-    "uhblk;": "\u2580",
-    "ulcorn;": "\u231c",
-    "ulcorner;": "\u231c",
-    "ulcrop;": "\u230f",
-    "ultri;": "\u25f8",
-    "umacr;": "\u016b",
-    "uml": "\xa8",
-    "uml;": "\xa8",
-    "uogon;": "\u0173",
-    "uopf;": "\U0001d566",
-    "uparrow;": "\u2191",
-    "updownarrow;": "\u2195",
-    "upharpoonleft;": "\u21bf",
-    "upharpoonright;": "\u21be",
-    "uplus;": "\u228e",
-    "upsi;": "\u03c5",
-    "upsih;": "\u03d2",
-    "upsilon;": "\u03c5",
-    "upuparrows;": "\u21c8",
-    "urcorn;": "\u231d",
-    "urcorner;": "\u231d",
-    "urcrop;": "\u230e",
-    "uring;": "\u016f",
-    "urtri;": "\u25f9",
-    "uscr;": "\U0001d4ca",
-    "utdot;": "\u22f0",
-    "utilde;": "\u0169",
-    "utri;": "\u25b5",
-    "utrif;": "\u25b4",
-    "uuarr;": "\u21c8",
-    "uuml": "\xfc",
-    "uuml;": "\xfc",
-    "uwangle;": "\u29a7",
-    "vArr;": "\u21d5",
-    "vBar;": "\u2ae8",
-    "vBarv;": "\u2ae9",
-    "vDash;": "\u22a8",
-    "vangrt;": "\u299c",
-    "varepsilon;": "\u03f5",
-    "varkappa;": "\u03f0",
-    "varnothing;": "\u2205",
-    "varphi;": "\u03d5",
-    "varpi;": "\u03d6",
-    "varpropto;": "\u221d",
-    "varr;": "\u2195",
-    "varrho;": "\u03f1",
-    "varsigma;": "\u03c2",
-    "varsubsetneq;": "\u228a\ufe00",
-    "varsubsetneqq;": "\u2acb\ufe00",
-    "varsupsetneq;": "\u228b\ufe00",
-    "varsupsetneqq;": "\u2acc\ufe00",
-    "vartheta;": "\u03d1",
-    "vartriangleleft;": "\u22b2",
-    "vartriangleright;": "\u22b3",
-    "vcy;": "\u0432",
-    "vdash;": "\u22a2",
-    "vee;": "\u2228",
-    "veebar;": "\u22bb",
-    "veeeq;": "\u225a",
-    "vellip;": "\u22ee",
-    "verbar;": "|",
-    "vert;": "|",
-    "vfr;": "\U0001d533",
-    "vltri;": "\u22b2",
-    "vnsub;": "\u2282\u20d2",
-    "vnsup;": "\u2283\u20d2",
-    "vopf;": "\U0001d567",
-    "vprop;": "\u221d",
-    "vrtri;": "\u22b3",
-    "vscr;": "\U0001d4cb",
-    "vsubnE;": "\u2acb\ufe00",
-    "vsubne;": "\u228a\ufe00",
-    "vsupnE;": "\u2acc\ufe00",
-    "vsupne;": "\u228b\ufe00",
-    "vzigzag;": "\u299a",
-    "wcirc;": "\u0175",
-    "wedbar;": "\u2a5f",
-    "wedge;": "\u2227",
-    "wedgeq;": "\u2259",
-    "weierp;": "\u2118",
-    "wfr;": "\U0001d534",
-    "wopf;": "\U0001d568",
-    "wp;": "\u2118",
-    "wr;": "\u2240",
-    "wreath;": "\u2240",
-    "wscr;": "\U0001d4cc",
-    "xcap;": "\u22c2",
-    "xcirc;": "\u25ef",
-    "xcup;": "\u22c3",
-    "xdtri;": "\u25bd",
-    "xfr;": "\U0001d535",
-    "xhArr;": "\u27fa",
-    "xharr;": "\u27f7",
-    "xi;": "\u03be",
-    "xlArr;": "\u27f8",
-    "xlarr;": "\u27f5",
-    "xmap;": "\u27fc",
-    "xnis;": "\u22fb",
-    "xodot;": "\u2a00",
-    "xopf;": "\U0001d569",
-    "xoplus;": "\u2a01",
-    "xotime;": "\u2a02",
-    "xrArr;": "\u27f9",
-    "xrarr;": "\u27f6",
-    "xscr;": "\U0001d4cd",
-    "xsqcup;": "\u2a06",
-    "xuplus;": "\u2a04",
-    "xutri;": "\u25b3",
-    "xvee;": "\u22c1",
-    "xwedge;": "\u22c0",
-    "yacute": "\xfd",
-    "yacute;": "\xfd",
-    "yacy;": "\u044f",
-    "ycirc;": "\u0177",
-    "ycy;": "\u044b",
-    "yen": "\xa5",
-    "yen;": "\xa5",
-    "yfr;": "\U0001d536",
-    "yicy;": "\u0457",
-    "yopf;": "\U0001d56a",
-    "yscr;": "\U0001d4ce",
-    "yucy;": "\u044e",
-    "yuml": "\xff",
-    "yuml;": "\xff",
-    "zacute;": "\u017a",
-    "zcaron;": "\u017e",
-    "zcy;": "\u0437",
-    "zdot;": "\u017c",
-    "zeetrf;": "\u2128",
-    "zeta;": "\u03b6",
-    "zfr;": "\U0001d537",
-    "zhcy;": "\u0436",
-    "zigrarr;": "\u21dd",
-    "zopf;": "\U0001d56b",
-    "zscr;": "\U0001d4cf",
-    "zwj;": "\u200d",
-    "zwnj;": "\u200c",
-}
-
-replacementCharacters = {
-    0x0: "\uFFFD",
-    0x0d: "\u000D",
-    0x80: "\u20AC",
-    0x81: "\u0081",
-    0x82: "\u201A",
-    0x83: "\u0192",
-    0x84: "\u201E",
-    0x85: "\u2026",
-    0x86: "\u2020",
-    0x87: "\u2021",
-    0x88: "\u02C6",
-    0x89: "\u2030",
-    0x8A: "\u0160",
-    0x8B: "\u2039",
-    0x8C: "\u0152",
-    0x8D: "\u008D",
-    0x8E: "\u017D",
-    0x8F: "\u008F",
-    0x90: "\u0090",
-    0x91: "\u2018",
-    0x92: "\u2019",
-    0x93: "\u201C",
-    0x94: "\u201D",
-    0x95: "\u2022",
-    0x96: "\u2013",
-    0x97: "\u2014",
-    0x98: "\u02DC",
-    0x99: "\u2122",
-    0x9A: "\u0161",
-    0x9B: "\u203A",
-    0x9C: "\u0153",
-    0x9D: "\u009D",
-    0x9E: "\u017E",
-    0x9F: "\u0178",
-}
-
-tokenTypes = {
-    "Doctype": 0,
-    "Characters": 1,
-    "SpaceCharacters": 2,
-    "StartTag": 3,
-    "EndTag": 4,
-    "EmptyTag": 5,
-    "Comment": 6,
-    "ParseError": 7
-}
-
-tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
-                           tokenTypes["EmptyTag"]])
-
-
-prefixes = {v: k for k, v in namespaces.items()}
-prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
-
-
-class DataLossWarning(UserWarning):
-    """Raised when the current tree is unable to represent the input data"""
-    pass
-
-
-class _ReparseException(Exception):
-    pass
diff --git a/src/pip/_vendor/html5lib/filters/__init__.py b/src/pip/_vendor/html5lib/filters/__init__.py
deleted file mode 100644
index e69de29bb..000000000
--- a/src/pip/_vendor/html5lib/filters/__init__.py
+++ /dev/null
diff --git a/src/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/src/pip/_vendor/html5lib/filters/alphabeticalattributes.py
deleted file mode 100644
index 5ba926e3b..000000000
--- a/src/pip/_vendor/html5lib/filters/alphabeticalattributes.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import base
-
-from collections import OrderedDict
-
-
-def _attr_key(attr):
-    """Return an appropriate key for an attribute for sorting
-
-    Attributes have a namespace that can be either ``None`` or a string. We
-    can't compare the two because they're different types, so we convert
-    ``None`` to an empty string first.
-
-    """
-    return (attr[0][0] or ''), attr[0][1]
-
-
-class Filter(base.Filter):
-    """Alphabetizes attributes for elements"""
-    def __iter__(self):
-        for token in base.Filter.__iter__(self):
-            if token["type"] in ("StartTag", "EmptyTag"):
-                attrs = OrderedDict()
-                for name, value in sorted(token["data"].items(),
-                                          key=_attr_key):
-                    attrs[name] = value
-                token["data"] = attrs
-            yield token
diff --git a/src/pip/_vendor/html5lib/filters/base.py b/src/pip/_vendor/html5lib/filters/base.py
deleted file mode 100644
index c7dbaed0f..000000000
--- a/src/pip/_vendor/html5lib/filters/base.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-class Filter(object):
-    def __init__(self, source):
-        self.source = source
-
-    def __iter__(self):
-        return iter(self.source)
-
-    def __getattr__(self, name):
-        return getattr(self.source, name)
diff --git a/src/pip/_vendor/html5lib/filters/inject_meta_charset.py b/src/pip/_vendor/html5lib/filters/inject_meta_charset.py
deleted file mode 100644
index aefb5c842..000000000
--- a/src/pip/_vendor/html5lib/filters/inject_meta_charset.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import base
-
-
-class Filter(base.Filter):
-    """Injects ``<meta charset=ENCODING>`` tag into head of document"""
-    def __init__(self, source, encoding):
-        """Creates a Filter
-
-        :arg source: the source token stream
-
-        :arg encoding: the encoding to set
-
-        """
-        base.Filter.__init__(self, source)
-        self.encoding = encoding
-
-    def __iter__(self):
-        state = "pre_head"
-        meta_found = (self.encoding is None)
-        pending = []
-
-        for token in base.Filter.__iter__(self):
-            type = token["type"]
-            if type == "StartTag":
-                if token["name"].lower() == "head":
-                    state = "in_head"
-
-            elif type == "EmptyTag":
-                if token["name"].lower() == "meta":
-                    # replace charset with actual encoding
-                    has_http_equiv_content_type = False
-                    for (namespace, name), value in token["data"].items():
-                        if namespace is not None:
-                            continue
-                        elif name.lower() == 'charset':
-                            token["data"][(namespace, name)] = self.encoding
-                            meta_found = True
-                            break
-                        elif name == 'http-equiv' and value.lower() == 'content-type':
-                            has_http_equiv_content_type = True
-                    else:
-                        if has_http_equiv_content_type and (None, "content") in token["data"]:
-                            token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
-                            meta_found = True
-
-                elif token["name"].lower() == "head" and not meta_found:
-                    # insert meta into empty head
-                    yield {"type": "StartTag", "name": "head",
-                           "data": token["data"]}
-                    yield {"type": "EmptyTag", "name": "meta",
-                           "data": {(None, "charset"): self.encoding}}
-                    yield {"type": "EndTag", "name": "head"}
-                    meta_found = True
-                    continue
-
-            elif type == "EndTag":
-                if token["name"].lower() == "head" and pending:
-                    # insert meta into head (if necessary) and flush pending queue
-                    yield pending.pop(0)
-                    if not meta_found:
-                        yield {"type": "EmptyTag", "name": "meta",
-                               "data": {(None, "charset"): self.encoding}}
-                    while pending:
-                        yield pending.pop(0)
-                    meta_found = True
-                    state = "post_head"
-
-            if state == "in_head":
-                pending.append(token)
-            else:
-                yield token
diff --git a/src/pip/_vendor/html5lib/filters/lint.py b/src/pip/_vendor/html5lib/filters/lint.py
deleted file mode 100644
index fcc07eec5..000000000
--- a/src/pip/_vendor/html5lib/filters/lint.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from pip._vendor.six import text_type
-
-from . import base
-from ..constants import namespaces, voidElements
-
-from ..constants import spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-
-class Filter(base.Filter):
-    """Lints the token stream for errors
-
-    If it finds any errors, it'll raise an ``AssertionError``.
-
-    """
-    def __init__(self, source, require_matching_tags=True):
-        """Creates a Filter
-
-        :arg source: the source token stream
-
-        :arg require_matching_tags: whether or not to require matching tags
-
-        """
-        super(Filter, self).__init__(source)
-        self.require_matching_tags = require_matching_tags
-
-    def __iter__(self):
-        open_elements = []
-        for token in base.Filter.__iter__(self):
-            type = token["type"]
-            if type in ("StartTag", "EmptyTag"):
-                namespace = token["namespace"]
-                name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
-                assert namespace != ""
-                assert isinstance(name, text_type)
-                assert name != ""
-                assert isinstance(token["data"], dict)
-                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    assert type == "EmptyTag"
-                else:
-                    assert type == "StartTag"
-                if type == "StartTag" and self.require_matching_tags:
-                    open_elements.append((namespace, name))
-                for (namespace, name), value in token["data"].items():
-                    assert namespace is None or isinstance(namespace, text_type)
-                    assert namespace != ""
-                    assert isinstance(name, text_type)
-                    assert name != ""
-                    assert isinstance(value, text_type)
-
-            elif type == "EndTag":
-                namespace = token["namespace"]
-                name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
-                assert namespace != ""
-                assert isinstance(name, text_type)
-                assert name != ""
-                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
-                elif self.require_matching_tags:
-                    start = open_elements.pop()
-                    assert start == (namespace, name)
-
-            elif type == "Comment":
-                data = token["data"]
-                assert isinstance(data, text_type)
-
-            elif type in ("Characters", "SpaceCharacters"):
-                data = token["data"]
-                assert isinstance(data, text_type)
-                assert data != ""
-                if type == "SpaceCharacters":
-                    assert data.strip(spaceCharacters) == ""
-
-            elif type == "Doctype":
-                name = token["name"]
-                assert name is None or isinstance(name, text_type)
-                assert token["publicId"] is None or isinstance(name, text_type)
-                assert token["systemId"] is None or isinstance(name, text_type)
-
-            elif type == "Entity":
-                assert isinstance(token["name"], text_type)
-
-            elif type == "SerializerError":
-                assert isinstance(token["data"], text_type)
-
-            else:
-                assert False, "Unknown token type: %(type)s" % {"type": type}
-
-            yield token
diff --git a/src/pip/_vendor/html5lib/filters/optionaltags.py b/src/pip/_vendor/html5lib/filters/optionaltags.py
deleted file mode 100644
index 4a865012c..000000000
--- a/src/pip/_vendor/html5lib/filters/optionaltags.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from . import base
-
-
-class Filter(base.Filter):
-    """Removes optional tags from the token stream"""
-    def slider(self):
-        previous1 = previous2 = None
-        for token in self.source:
-            if previous1 is not None:
-                yield previous2, previous1, token
-            previous2 = previous1
-            previous1 = token
-        if previous1 is not None:
-            yield previous2, previous1, None
-
-    def __iter__(self):
-        for previous, token, next in self.slider():
-            type = token["type"]
-            if type == "StartTag":
-                if (token["data"] or
-                        not self.is_optional_start(token["name"], previous, next)):
-                    yield token
-            elif type == "EndTag":
-                if not self.is_optional_end(token["name"], next):
-                    yield token
-            else:
-                yield token
-
-    def is_optional_start(self, tagname, previous, next):
-        type = next and next["type"] or None
-        if tagname in 'html':
-            # An html element's start tag may be omitted if the first thing
-            # inside the html element is not a space character or a comment.
-            return type not in ("Comment", "SpaceCharacters")
-        elif tagname == 'head':
-            # A head element's start tag may be omitted if the first thing
-            # inside the head element is an element.
-            # XXX: we also omit the start tag if the head element is empty
-            if type in ("StartTag", "EmptyTag"):
-                return True
-            elif type == "EndTag":
-                return next["name"] == "head"
-        elif tagname == 'body':
-            # A body element's start tag may be omitted if the first thing
-            # inside the body element is not a space character or a comment,
-            # except if the first thing inside the body element is a script
-            # or style element and the node immediately preceding the body
-            # element is a head element whose end tag has been omitted.
-            if type in ("Comment", "SpaceCharacters"):
-                return False
-            elif type == "StartTag":
-                # XXX: we do not look at the preceding event, so we never omit
-                # the body element's start tag if it's followed by a script or
-                # a style element.
-                return next["name"] not in ('script', 'style')
-            else:
-                return True
-        elif tagname == 'colgroup':
-            # A colgroup element's start tag may be omitted if the first thing
-            # inside the colgroup element is a col element, and if the element
-            # is not immediately preceded by another colgroup element whose
-            # end tag has been omitted.
-            if type in ("StartTag", "EmptyTag"):
-                # XXX: we do not look at the preceding event, so instead we never
-                # omit the colgroup element's end tag when it is immediately
-                # followed by another colgroup element. See is_optional_end.
-                return next["name"] == "col"
-            else:
-                return False
-        elif tagname == 'tbody':
-            # A tbody element's start tag may be omitted if the first thing
-            # inside the tbody element is a tr element, and if the element is
-            # not immediately preceded by a tbody, thead, or tfoot element
-            # whose end tag has been omitted.
-            if type == "StartTag":
-                # omit the thead and tfoot elements' end tag when they are
-                # immediately followed by a tbody element. See is_optional_end.
-                if previous and previous['type'] == 'EndTag' and \
-                        previous['name'] in ('tbody', 'thead', 'tfoot'):
-                    return False
-                return next["name"] == 'tr'
-            else:
-                return False
-        return False
-
-    def is_optional_end(self, tagname, next):
-        type = next and next["type"] or None
-        if tagname in ('html', 'head', 'body'):
-            # An html element's end tag may be omitted if the html element
-            # is not immediately followed by a space character or a comment.
-            return type not in ("Comment", "SpaceCharacters")
-        elif tagname in ('li', 'optgroup', 'tr'):
-            # A li element's end tag may be omitted if the li element is
-            # immediately followed by another li element or if there is
-            # no more content in the parent element.
-            # An optgroup element's end tag may be omitted if the optgroup
-            # element is immediately followed by another optgroup element,
-            # or if there is no more content in the parent element.
-            # A tr element's end tag may be omitted if the tr element is
-            # immediately followed by another tr element, or if there is
-            # no more content in the parent element.
-            if type == "StartTag":
-                return next["name"] == tagname
-            else:
-                return type == "EndTag" or type is None
-        elif tagname in ('dt', 'dd'):
-            # A dt element's end tag may be omitted if the dt element is
-            # immediately followed by another dt element or a dd element.
-            # A dd element's end tag may be omitted if the dd element is
-            # immediately followed by another dd element or a dt element,
-            # or if there is no more content in the parent element.
-            if type == "StartTag":
-                return next["name"] in ('dt', 'dd')
-            elif tagname == 'dd':
-                return type == "EndTag" or type is None
-            else:
-                return False
-        elif tagname == 'p':
-            # A p element's end tag may be omitted if the p element is
-            # immediately followed by an address, article, aside,
-            # blockquote, datagrid, dialog, dir, div, dl, fieldset,
-            # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
-            # nav, ol, p, pre, section, table, or ul, element, or if
-            # there is no more content in the parent element.
-            if type in ("StartTag", "EmptyTag"):
-                return next["name"] in ('address', 'article', 'aside',
-                                        'blockquote', 'datagrid', 'dialog',
-                                        'dir', 'div', 'dl', 'fieldset', 'footer',
-                                        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
-                                        'header', 'hr', 'menu', 'nav', 'ol',
-                                        'p', 'pre', 'section', 'table', 'ul')
-            else:
-                return type == "EndTag" or type is None
-        elif tagname == 'option':
-            # An option element's end tag may be omitted if the option
-            # element is immediately followed by another option element,
-            # or if it is immediately followed by an <code>optgroup</code>
-            # element, or if there is no more content in the parent
-            # element.
-            if type == "StartTag":
-                return next["name"] in ('option', 'optgroup')
-            else:
-                return type == "EndTag" or type is None
-        elif tagname in ('rt', 'rp'):
-            # An rt element's end tag may be omitted if the rt element is
-            # immediately followed by an rt or rp element, or if there is
-            # no more content in the parent element.
-            # An rp element's end tag may be omitted if the rp element is
-            # immediately followed by an rt or rp element, or if there is
-            # no more content in the parent element.
-            if type == "StartTag":
-                return next["name"] in ('rt', 'rp')
-            else:
-                return type == "EndTag" or type is None
-        elif tagname == 'colgroup':
-            # A colgroup element's end tag may be omitted if the colgroup
-            # element is not immediately followed by a space character or
-            # a comment.
-            if type in ("Comment", "SpaceCharacters"):
-                return False
-            elif type == "StartTag":
-                # XXX: we also look for an immediately following colgroup
-                # element. See is_optional_start.
-                return next["name"] != 'colgroup'
-            else:
-                return True
-        elif tagname in ('thead', 'tbody'):
-            # A thead element's end tag may be omitted if the thead element
-            # is immediately followed by a tbody or tfoot element.
-            # A tbody element's end tag may be omitted if the tbody element
-            # is immediately followed by a tbody or tfoot element, or if
-            # there is no more content in the parent element.
-            # A tfoot element's end tag may be omitted if the tfoot element
-            # is immediately followed by a tbody element, or if there is no
-            # more content in the parent element.
-            # XXX: we never omit the end tag when the following element is
-            # a tbody. See is_optional_start.
-            if type == "StartTag":
-                return next["name"] in ['tbody', 'tfoot']
-            elif tagname == 'tbody':
-                return type == "EndTag" or type is None
-            else:
-                return False
-        elif tagname == 'tfoot':
-            # A tfoot element's end tag may be omitted if the tfoot element
-            # is immediately followed by a tbody element, or if there is no
-            # more content in the parent element.
-            # XXX: we never omit the end tag when the following element is
-            # a tbody. See is_optional_start.
-            if type == "StartTag":
-                return next["name"] == 'tbody'
-            else:
-                return type == "EndTag" or type is None
-        elif tagname in ('td', 'th'):
-            # A td element's end tag may be omitted if the td element is
-            # immediately followed by a td or th element, or if there is
-            # no more content in the parent element.
-            # A th element's end tag may be omitted if the th element is
-            # immediately followed by a td or th element, or if there is
-            # no more content in the parent element.
-            if type == "StartTag":
-                return next["name"] in ('td', 'th')
-            else:
-                return type == "EndTag" or type is None
-        return False
diff --git a/src/pip/_vendor/html5lib/filters/sanitizer.py b/src/pip/_vendor/html5lib/filters/sanitizer.py
deleted file mode 100644
index aa7431d13..000000000
--- a/src/pip/_vendor/html5lib/filters/sanitizer.py
+++ /dev/null
@@ -1,916 +0,0 @@
-"""Deprecated from html5lib 1.1.
-
-See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
-information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
-is recommended as a replacement. Please let us know in the aforementioned issue
-if Bleach is unsuitable for your needs.
-
-"""
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-import warnings
-from xml.sax.saxutils import escape, unescape
-
-from pip._vendor.six.moves import urllib_parse as urlparse
-
-from . import base
-from ..constants import namespaces, prefixes
-
-__all__ = ["Filter"]
-
-
-_deprecation_msg = (
-    "html5lib's sanitizer is deprecated; see " +
-    "https://github.com/html5lib/html5lib-python/issues/443 and please let " +
-    "us know if Bleach is unsuitable for your needs"
-)
-
-warnings.warn(_deprecation_msg, DeprecationWarning)
-
-allowed_elements = frozenset((
-    (namespaces['html'], 'a'),
-    (namespaces['html'], 'abbr'),
-    (namespaces['html'], 'acronym'),
-    (namespaces['html'], 'address'),
-    (namespaces['html'], 'area'),
-    (namespaces['html'], 'article'),
-    (namespaces['html'], 'aside'),
-    (namespaces['html'], 'audio'),
-    (namespaces['html'], 'b'),
-    (namespaces['html'], 'big'),
-    (namespaces['html'], 'blockquote'),
-    (namespaces['html'], 'br'),
-    (namespaces['html'], 'button'),
-    (namespaces['html'], 'canvas'),
-    (namespaces['html'], 'caption'),
-    (namespaces['html'], 'center'),
-    (namespaces['html'], 'cite'),
-    (namespaces['html'], 'code'),
-    (namespaces['html'], 'col'),
-    (namespaces['html'], 'colgroup'),
-    (namespaces['html'], 'command'),
-    (namespaces['html'], 'datagrid'),
-    (namespaces['html'], 'datalist'),
-    (namespaces['html'], 'dd'),
-    (namespaces['html'], 'del'),
-    (namespaces['html'], 'details'),
-    (namespaces['html'], 'dfn'),
-    (namespaces['html'], 'dialog'),
-    (namespaces['html'], 'dir'),
-    (namespaces['html'], 'div'),
-    (namespaces['html'], 'dl'),
-    (namespaces['html'], 'dt'),
-    (namespaces['html'], 'em'),
-    (namespaces['html'], 'event-source'),
-    (namespaces['html'], 'fieldset'),
-    (namespaces['html'], 'figcaption'),
-    (namespaces['html'], 'figure'),
-    (namespaces['html'], 'footer'),
-    (namespaces['html'], 'font'),
-    (namespaces['html'], 'form'),
-    (namespaces['html'], 'header'),
-    (namespaces['html'], 'h1'),
-    (namespaces['html'], 'h2'),
-    (namespaces['html'], 'h3'),
-    (namespaces['html'], 'h4'),
-    (namespaces['html'], 'h5'),
-    (namespaces['html'], 'h6'),
-    (namespaces['html'], 'hr'),
-    (namespaces['html'], 'i'),
-    (namespaces['html'], 'img'),
-    (namespaces['html'], 'input'),
-    (namespaces['html'], 'ins'),
-    (namespaces['html'], 'keygen'),
-    (namespaces['html'], 'kbd'),
-    (namespaces['html'], 'label'),
-    (namespaces['html'], 'legend'),
-    (namespaces['html'], 'li'),
-    (namespaces['html'], 'm'),
-    (namespaces['html'], 'map'),
-    (namespaces['html'], 'menu'),
-    (namespaces['html'], 'meter'),
-    (namespaces['html'], 'multicol'),
-    (namespaces['html'], 'nav'),
-    (namespaces['html'], 'nextid'),
-    (namespaces['html'], 'ol'),
-    (namespaces['html'], 'output'),
-    (namespaces['html'], 'optgroup'),
-    (namespaces['html'], 'option'),
-    (namespaces['html'], 'p'),
-    (namespaces['html'], 'pre'),
-    (namespaces['html'], 'progress'),
-    (namespaces['html'], 'q'),
-    (namespaces['html'], 's'),
-    (namespaces['html'], 'samp'),
-    (namespaces['html'], 'section'),
-    (namespaces['html'], 'select'),
-    (namespaces['html'], 'small'),
-    (namespaces['html'], 'sound'),
-    (namespaces['html'], 'source'),
-    (namespaces['html'], 'spacer'),
-    (namespaces['html'], 'span'),
-    (namespaces['html'], 'strike'),
-    (namespaces['html'], 'strong'),
-    (namespaces['html'], 'sub'),
-    (namespaces['html'], 'sup'),
-    (namespaces['html'], 'table'),
-    (namespaces['html'], 'tbody'),
-    (namespaces['html'], 'td'),
-    (namespaces['html'], 'textarea'),
-    (namespaces['html'], 'time'),
-    (namespaces['html'], 'tfoot'),
-    (namespaces['html'], 'th'),
-    (namespaces['html'], 'thead'),
-    (namespaces['html'], 'tr'),
-    (namespaces['html'], 'tt'),
-    (namespaces['html'], 'u'),
-    (namespaces['html'], 'ul'),
-    (namespaces['html'], 'var'),
-    (namespaces['html'], 'video'),
-    (namespaces['mathml'], 'maction'),
-    (namespaces['mathml'], 'math'),
-    (namespaces['mathml'], 'merror'),
-    (namespaces['mathml'], 'mfrac'),
-    (namespaces['mathml'], 'mi'),
-    (namespaces['mathml'], 'mmultiscripts'),
-    (namespaces['mathml'], 'mn'),
-    (namespaces['mathml'], 'mo'),
-    (namespaces['mathml'], 'mover'),
-    (namespaces['mathml'], 'mpadded'),
-    (namespaces['mathml'], 'mphantom'),
-    (namespaces['mathml'], 'mprescripts'),
-    (namespaces['mathml'], 'mroot'),
-    (namespaces['mathml'], 'mrow'),
-    (namespaces['mathml'], 'mspace'),
-    (namespaces['mathml'], 'msqrt'),
-    (namespaces['mathml'], 'mstyle'),
-    (namespaces['mathml'], 'msub'),
-    (namespaces['mathml'], 'msubsup'),
-    (namespaces['mathml'], 'msup'),
-    (namespaces['mathml'], 'mtable'),
-    (namespaces['mathml'], 'mtd'),
-    (namespaces['mathml'], 'mtext'),
-    (namespaces['mathml'], 'mtr'),
-    (namespaces['mathml'], 'munder'),
-    (namespaces['mathml'], 'munderover'),
-    (namespaces['mathml'], 'none'),
-    (namespaces['svg'], 'a'),
-    (namespaces['svg'], 'animate'),
-    (namespaces['svg'], 'animateColor'),
-    (namespaces['svg'], 'animateMotion'),
-    (namespaces['svg'], 'animateTransform'),
-    (namespaces['svg'], 'clipPath'),
-    (namespaces['svg'], 'circle'),
-    (namespaces['svg'], 'defs'),
-    (namespaces['svg'], 'desc'),
-    (namespaces['svg'], 'ellipse'),
-    (namespaces['svg'], 'font-face'),
-    (namespaces['svg'], 'font-face-name'),
-    (namespaces['svg'], 'font-face-src'),
-    (namespaces['svg'], 'g'),
-    (namespaces['svg'], 'glyph'),
-    (namespaces['svg'], 'hkern'),
-    (namespaces['svg'], 'linearGradient'),
-    (namespaces['svg'], 'line'),
-    (namespaces['svg'], 'marker'),
-    (namespaces['svg'], 'metadata'),
-    (namespaces['svg'], 'missing-glyph'),
-    (namespaces['svg'], 'mpath'),
-    (namespaces['svg'], 'path'),
-    (namespaces['svg'], 'polygon'),
-    (namespaces['svg'], 'polyline'),
-    (namespaces['svg'], 'radialGradient'),
-    (namespaces['svg'], 'rect'),
-    (namespaces['svg'], 'set'),
-    (namespaces['svg'], 'stop'),
-    (namespaces['svg'], 'svg'),
-    (namespaces['svg'], 'switch'),
-    (namespaces['svg'], 'text'),
-    (namespaces['svg'], 'title'),
-    (namespaces['svg'], 'tspan'),
-    (namespaces['svg'], 'use'),
-))
-
-allowed_attributes = frozenset((
-    # HTML attributes
-    (None, 'abbr'),
-    (None, 'accept'),
-    (None, 'accept-charset'),
-    (None, 'accesskey'),
-    (None, 'action'),
-    (None, 'align'),
-    (None, 'alt'),
-    (None, 'autocomplete'),
-    (None, 'autofocus'),
-    (None, 'axis'),
-    (None, 'background'),
-    (None, 'balance'),
-    (None, 'bgcolor'),
-    (None, 'bgproperties'),
-    (None, 'border'),
-    (None, 'bordercolor'),
-    (None, 'bordercolordark'),
-    (None, 'bordercolorlight'),
-    (None, 'bottompadding'),
-    (None, 'cellpadding'),
-    (None, 'cellspacing'),
-    (None, 'ch'),
-    (None, 'challenge'),
-    (None, 'char'),
-    (None, 'charoff'),
-    (None, 'choff'),
-    (None, 'charset'),
-    (None, 'checked'),
-    (None, 'cite'),
-    (None, 'class'),
-    (None, 'clear'),
-    (None, 'color'),
-    (None, 'cols'),
-    (None, 'colspan'),
-    (None, 'compact'),
-    (None, 'contenteditable'),
-    (None, 'controls'),
-    (None, 'coords'),
-    (None, 'data'),
-    (None, 'datafld'),
-    (None, 'datapagesize'),
-    (None, 'datasrc'),
-    (None, 'datetime'),
-    (None, 'default'),
-    (None, 'delay'),
-    (None, 'dir'),
-    (None, 'disabled'),
-    (None, 'draggable'),
-    (None, 'dynsrc'),
-    (None, 'enctype'),
-    (None, 'end'),
-    (None, 'face'),
-    (None, 'for'),
-    (None, 'form'),
-    (None, 'frame'),
-    (None, 'galleryimg'),
-    (None, 'gutter'),
-    (None, 'headers'),
-    (None, 'height'),
-    (None, 'hidefocus'),
-    (None, 'hidden'),
-    (None, 'high'),
-    (None, 'href'),
-    (None, 'hreflang'),
-    (None, 'hspace'),
-    (None, 'icon'),
-    (None, 'id'),
-    (None, 'inputmode'),
-    (None, 'ismap'),
-    (None, 'keytype'),
-    (None, 'label'),
-    (None, 'leftspacing'),
-    (None, 'lang'),
-    (None, 'list'),
-    (None, 'longdesc'),
-    (None, 'loop'),
-    (None, 'loopcount'),
-    (None, 'loopend'),
-    (None, 'loopstart'),
-    (None, 'low'),
-    (None, 'lowsrc'),
-    (None, 'max'),
-    (None, 'maxlength'),
-    (None, 'media'),
-    (None, 'method'),
-    (None, 'min'),
-    (None, 'multiple'),
-    (None, 'name'),
-    (None, 'nohref'),
-    (None, 'noshade'),
-    (None, 'nowrap'),
-    (None, 'open'),
-    (None, 'optimum'),
-    (None, 'pattern'),
-    (None, 'ping'),
-    (None, 'point-size'),
-    (None, 'poster'),
-    (None, 'pqg'),
-    (None, 'preload'),
-    (None, 'prompt'),
-    (None, 'radiogroup'),
-    (None, 'readonly'),
-    (None, 'rel'),
-    (None, 'repeat-max'),
-    (None, 'repeat-min'),
-    (None, 'replace'),
-    (None, 'required'),
-    (None, 'rev'),
-    (None, 'rightspacing'),
-    (None, 'rows'),
-    (None, 'rowspan'),
-    (None, 'rules'),
-    (None, 'scope'),
-    (None, 'selected'),
-    (None, 'shape'),
-    (None, 'size'),
-    (None, 'span'),
-    (None, 'src'),
-    (None, 'start'),
-    (None, 'step'),
-    (None, 'style'),
-    (None, 'summary'),
-    (None, 'suppress'),
-    (None, 'tabindex'),
-    (None, 'target'),
-    (None, 'template'),
-    (None, 'title'),
-    (None, 'toppadding'),
-    (None, 'type'),
-    (None, 'unselectable'),
-    (None, 'usemap'),
-    (None, 'urn'),
-    (None, 'valign'),
-    (None, 'value'),
-    (None, 'variable'),
-    (None, 'volume'),
-    (None, 'vspace'),
-    (None, 'vrml'),
-    (None, 'width'),
-    (None, 'wrap'),
-    (namespaces['xml'], 'lang'),
-    # MathML attributes
-    (None, 'actiontype'),
-    (None, 'align'),
-    (None, 'columnalign'),
-    (None, 'columnalign'),
-    (None, 'columnalign'),
-    (None, 'columnlines'),
-    (None, 'columnspacing'),
-    (None, 'columnspan'),
-    (None, 'depth'),
-    (None, 'display'),
-    (None, 'displaystyle'),
-    (None, 'equalcolumns'),
-    (None, 'equalrows'),
-    (None, 'fence'),
-    (None, 'fontstyle'),
-    (None, 'fontweight'),
-    (None, 'frame'),
-    (None, 'height'),
-    (None, 'linethickness'),
-    (None, 'lspace'),
-    (None, 'mathbackground'),
-    (None, 'mathcolor'),
-    (None, 'mathvariant'),
-    (None, 'mathvariant'),
-    (None, 'maxsize'),
-    (None, 'minsize'),
-    (None, 'other'),
-    (None, 'rowalign'),
-    (None, 'rowalign'),
-    (None, 'rowalign'),
-    (None, 'rowlines'),
-    (None, 'rowspacing'),
-    (None, 'rowspan'),
-    (None, 'rspace'),
-    (None, 'scriptlevel'),
-    (None, 'selection'),
-    (None, 'separator'),
-    (None, 'stretchy'),
-    (None, 'width'),
-    (None, 'width'),
-    (namespaces['xlink'], 'href'),
-    (namespaces['xlink'], 'show'),
-    (namespaces['xlink'], 'type'),
-    # SVG attributes
-    (None, 'accent-height'),
-    (None, 'accumulate'),
-    (None, 'additive'),
-    (None, 'alphabetic'),
-    (None, 'arabic-form'),
-    (None, 'ascent'),
-    (None, 'attributeName'),
-    (None, 'attributeType'),
-    (None, 'baseProfile'),
-    (None, 'bbox'),
-    (None, 'begin'),
-    (None, 'by'),
-    (None, 'calcMode'),
-    (None, 'cap-height'),
-    (None, 'class'),
-    (None, 'clip-path'),
-    (None, 'color'),
-    (None, 'color-rendering'),
-    (None, 'content'),
-    (None, 'cx'),
-    (None, 'cy'),
-    (None, 'd'),
-    (None, 'dx'),
-    (None, 'dy'),
-    (None, 'descent'),
-    (None, 'display'),
-    (None, 'dur'),
-    (None, 'end'),
-    (None, 'fill'),
-    (None, 'fill-opacity'),
-    (None, 'fill-rule'),
-    (None, 'font-family'),
-    (None, 'font-size'),
-    (None, 'font-stretch'),
-    (None, 'font-style'),
-    (None, 'font-variant'),
-    (None, 'font-weight'),
-    (None, 'from'),
-    (None, 'fx'),
-    (None, 'fy'),
-    (None, 'g1'),
-    (None, 'g2'),
-    (None, 'glyph-name'),
-    (None, 'gradientUnits'),
-    (None, 'hanging'),
-    (None, 'height'),
-    (None, 'horiz-adv-x'),
-    (None, 'horiz-origin-x'),
-    (None, 'id'),
-    (None, 'ideographic'),
-    (None, 'k'),
-    (None, 'keyPoints'),
-    (None, 'keySplines'),
-    (None, 'keyTimes'),
-    (None, 'lang'),
-    (None, 'marker-end'),
-    (None, 'marker-mid'),
-    (None, 'marker-start'),
-    (None, 'markerHeight'),
-    (None, 'markerUnits'),
-    (None, 'markerWidth'),
-    (None, 'mathematical'),
-    (None, 'max'),
-    (None, 'min'),
-    (None, 'name'),
-    (None, 'offset'),
-    (None, 'opacity'),
-    (None, 'orient'),
-    (None, 'origin'),
-    (None, 'overline-position'),
-    (None, 'overline-thickness'),
-    (None, 'panose-1'),
-    (None, 'path'),
-    (None, 'pathLength'),
-    (None, 'points'),
-    (None, 'preserveAspectRatio'),
-    (None, 'r'),
-    (None, 'refX'),
-    (None, 'refY'),
-    (None, 'repeatCount'),
-    (None, 'repeatDur'),
-    (None, 'requiredExtensions'),
-    (None, 'requiredFeatures'),
-    (None, 'restart'),
-    (None, 'rotate'),
-    (None, 'rx'),
-    (None, 'ry'),
-    (None, 'slope'),
-    (None, 'stemh'),
-    (None, 'stemv'),
-    (None, 'stop-color'),
-    (None, 'stop-opacity'),
-    (None, 'strikethrough-position'),
-    (None, 'strikethrough-thickness'),
-    (None, 'stroke'),
-    (None, 'stroke-dasharray'),
-    (None, 'stroke-dashoffset'),
-    (None, 'stroke-linecap'),
-    (None, 'stroke-linejoin'),
-    (None, 'stroke-miterlimit'),
-    (None, 'stroke-opacity'),
-    (None, 'stroke-width'),
-    (None, 'systemLanguage'),
-    (None, 'target'),
-    (None, 'text-anchor'),
-    (None, 'to'),
-    (None, 'transform'),
-    (None, 'type'),
-    (None, 'u1'),
-    (None, 'u2'),
-    (None, 'underline-position'),
-    (None, 'underline-thickness'),
-    (None, 'unicode'),
-    (None, 'unicode-range'),
-    (None, 'units-per-em'),
-    (None, 'values'),
-    (None, 'version'),
-    (None, 'viewBox'),
-    (None, 'visibility'),
-    (None, 'width'),
-    (None, 'widths'),
-    (None, 'x'),
-    (None, 'x-height'),
-    (None, 'x1'),
-    (None, 'x2'),
-    (namespaces['xlink'], 'actuate'),
-    (namespaces['xlink'], 'arcrole'),
-    (namespaces['xlink'], 'href'),
-    (namespaces['xlink'], 'role'),
-    (namespaces['xlink'], 'show'),
-    (namespaces['xlink'], 'title'),
-    (namespaces['xlink'], 'type'),
-    (namespaces['xml'], 'base'),
-    (namespaces['xml'], 'lang'),
-    (namespaces['xml'], 'space'),
-    (None, 'y'),
-    (None, 'y1'),
-    (None, 'y2'),
-    (None, 'zoomAndPan'),
-))
-
-attr_val_is_uri = frozenset((
-    (None, 'href'),
-    (None, 'src'),
-    (None, 'cite'),
-    (None, 'action'),
-    (None, 'longdesc'),
-    (None, 'poster'),
-    (None, 'background'),
-    (None, 'datasrc'),
-    (None, 'dynsrc'),
-    (None, 'lowsrc'),
-    (None, 'ping'),
-    (namespaces['xlink'], 'href'),
-    (namespaces['xml'], 'base'),
-))
-
-svg_attr_val_allows_ref = frozenset((
-    (None, 'clip-path'),
-    (None, 'color-profile'),
-    (None, 'cursor'),
-    (None, 'fill'),
-    (None, 'filter'),
-    (None, 'marker'),
-    (None, 'marker-start'),
-    (None, 'marker-mid'),
-    (None, 'marker-end'),
-    (None, 'mask'),
-    (None, 'stroke'),
-))
-
-svg_allow_local_href = frozenset((
-    (None, 'altGlyph'),
-    (None, 'animate'),
-    (None, 'animateColor'),
-    (None, 'animateMotion'),
-    (None, 'animateTransform'),
-    (None, 'cursor'),
-    (None, 'feImage'),
-    (None, 'filter'),
-    (None, 'linearGradient'),
-    (None, 'pattern'),
-    (None, 'radialGradient'),
-    (None, 'textpath'),
-    (None, 'tref'),
-    (None, 'set'),
-    (None, 'use')
-))
-
-allowed_css_properties = frozenset((
-    'azimuth',
-    'background-color',
-    'border-bottom-color',
-    'border-collapse',
-    'border-color',
-    'border-left-color',
-    'border-right-color',
-    'border-top-color',
-    'clear',
-    'color',
-    'cursor',
-    'direction',
-    'display',
-    'elevation',
-    'float',
-    'font',
-    'font-family',
-    'font-size',
-    'font-style',
-    'font-variant',
-    'font-weight',
-    'height',
-    'letter-spacing',
-    'line-height',
-    'overflow',
-    'pause',
-    'pause-after',
-    'pause-before',
-    'pitch',
-    'pitch-range',
-    'richness',
-    'speak',
-    'speak-header',
-    'speak-numeral',
-    'speak-punctuation',
-    'speech-rate',
-    'stress',
-    'text-align',
-    'text-decoration',
-    'text-indent',
-    'unicode-bidi',
-    'vertical-align',
-    'voice-family',
-    'volume',
-    'white-space',
-    'width',
-))
-
-allowed_css_keywords = frozenset((
-    'auto',
-    'aqua',
-    'black',
-    'block',
-    'blue',
-    'bold',
-    'both',
-    'bottom',
-    'brown',
-    'center',
-    'collapse',
-    'dashed',
-    'dotted',
-    'fuchsia',
-    'gray',
-    'green',
-    '!important',
-    'italic',
-    'left',
-    'lime',
-    'maroon',
-    'medium',
-    'none',
-    'navy',
-    'normal',
-    'nowrap',
-    'olive',
-    'pointer',
-    'purple',
-    'red',
-    'right',
-    'solid',
-    'silver',
-    'teal',
-    'top',
-    'transparent',
-    'underline',
-    'white',
-    'yellow',
-))
-
-allowed_svg_properties = frozenset((
-    'fill',
-    'fill-opacity',
-    'fill-rule',
-    'stroke',
-    'stroke-width',
-    'stroke-linecap',
-    'stroke-linejoin',
-    'stroke-opacity',
-))
-
-allowed_protocols = frozenset((
-    'ed2k',
-    'ftp',
-    'http',
-    'https',
-    'irc',
-    'mailto',
-    'news',
-    'gopher',
-    'nntp',
-    'telnet',
-    'webcal',
-    'xmpp',
-    'callto',
-    'feed',
-    'urn',
-    'aim',
-    'rsync',
-    'tag',
-    'ssh',
-    'sftp',
-    'rtsp',
-    'afs',
-    'data',
-))
-
-allowed_content_types = frozenset((
-    'image/png',
-    'image/jpeg',
-    'image/gif',
-    'image/webp',
-    'image/bmp',
-    'text/plain',
-))
-
-
-data_content_type = re.compile(r'''
-                                ^
-                                # Match a content type <application>/<type>
-                                (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
-                                # Match any character set and encoding
-                                (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
-                                  |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
-                                # Assume the rest is data
-                                ,.*
-                                $
-                                ''',
-                               re.VERBOSE)
-
-
-class Filter(base.Filter):
-    """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
-    def __init__(self,
-                 source,
-                 allowed_elements=allowed_elements,
-                 allowed_attributes=allowed_attributes,
-                 allowed_css_properties=allowed_css_properties,
-                 allowed_css_keywords=allowed_css_keywords,
-                 allowed_svg_properties=allowed_svg_properties,
-                 allowed_protocols=allowed_protocols,
-                 allowed_content_types=allowed_content_types,
-                 attr_val_is_uri=attr_val_is_uri,
-                 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
-                 svg_allow_local_href=svg_allow_local_href):
-        """Creates a Filter
-
-        :arg allowed_elements: set of elements to allow--everything else will
-            be escaped
-
-        :arg allowed_attributes: set of attributes to allow in
-            elements--everything else will be stripped
-
-        :arg allowed_css_properties: set of CSS properties to allow--everything
-            else will be stripped
-
-        :arg allowed_css_keywords: set of CSS keywords to allow--everything
-            else will be stripped
-
-        :arg allowed_svg_properties: set of SVG properties to allow--everything
-            else will be removed
-
-        :arg allowed_protocols: set of allowed protocols for URIs
-
-        :arg allowed_content_types: set of allowed content types for ``data`` URIs.
-
-        :arg attr_val_is_uri: set of attributes that have URI values--values
-            that have a scheme not listed in ``allowed_protocols`` are removed
-
-        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
-            references
-
-        :arg svg_allow_local_href: set of SVG elements that can have local
-            hrefs--these are removed
-
-        """
-        super(Filter, self).__init__(source)
-
-        warnings.warn(_deprecation_msg, DeprecationWarning)
-
-        self.allowed_elements = allowed_elements
-        self.allowed_attributes = allowed_attributes
-        self.allowed_css_properties = allowed_css_properties
-        self.allowed_css_keywords = allowed_css_keywords
-        self.allowed_svg_properties = allowed_svg_properties
-        self.allowed_protocols = allowed_protocols
-        self.allowed_content_types = allowed_content_types
-        self.attr_val_is_uri = attr_val_is_uri
-        self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
-        self.svg_allow_local_href = svg_allow_local_href
-
-    def __iter__(self):
-        for token in base.Filter.__iter__(self):
-            token = self.sanitize_token(token)
-            if token:
-                yield token
-
-    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
-    # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
-    # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
-    # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
-    # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
-    # allowed.
-    #
-    #   sanitize_html('<script> do_nasty_stuff() </script>')
-    #    => &lt;script> do_nasty_stuff() &lt;/script>
-    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
-    #    => <a>Click here for $100</a>
-    def sanitize_token(self, token):
-
-        # accommodate filters which use token_type differently
-        token_type = token["type"]
-        if token_type in ("StartTag", "EndTag", "EmptyTag"):
-            name = token["name"]
-            namespace = token["namespace"]
-            if ((namespace, name) in self.allowed_elements or
-                (namespace is None and
-                 (namespaces["html"], name) in self.allowed_elements)):
-                return self.allowed_token(token)
-            else:
-                return self.disallowed_token(token)
-        elif token_type == "Comment":
-            pass
-        else:
-            return token
-
-    def allowed_token(self, token):
-        if "data" in token:
-            attrs = token["data"]
-            attr_names = set(attrs.keys())
-
-            # Remove forbidden attributes
-            for to_remove in (attr_names - self.allowed_attributes):
-                del token["data"][to_remove]
-                attr_names.remove(to_remove)
-
-            # Remove attributes with disallowed URL values
-            for attr in (attr_names & self.attr_val_is_uri):
-                assert attr in attrs
-                # I don't have a clue where this regexp comes from or why it matches those
-                # characters, nor why we call unescape. I just know it's always been here.
-                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
-                # this will do is remove *more* than it otherwise would.
-                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
-                                       unescape(attrs[attr])).lower()
-                # remove replacement characters from unescaped characters
-                val_unescaped = val_unescaped.replace("\ufffd", "")
-                try:
-                    uri = urlparse.urlparse(val_unescaped)
-                except ValueError:
-                    uri = None
-                    del attrs[attr]
-                if uri and uri.scheme:
-                    if uri.scheme not in self.allowed_protocols:
-                        del attrs[attr]
-                    if uri.scheme == 'data':
-                        m = data_content_type.match(uri.path)
-                        if not m:
-                            del attrs[attr]
-                        elif m.group('content_type') not in self.allowed_content_types:
-                            del attrs[attr]
-
-            for attr in self.svg_attr_val_allows_ref:
-                if attr in attrs:
-                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
-                                         ' ',
-                                         unescape(attrs[attr]))
-            if (token["name"] in self.svg_allow_local_href and
-                (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
-                                                                     attrs[(namespaces['xlink'], 'href')])):
-                del attrs[(namespaces['xlink'], 'href')]
-            if (None, 'style') in attrs:
-                attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
-            token["data"] = attrs
-        return token
-
-    def disallowed_token(self, token):
-        token_type = token["type"]
-        if token_type == "EndTag":
-            token["data"] = "</%s>" % token["name"]
-        elif token["data"]:
-            assert token_type in ("StartTag", "EmptyTag")
-            attrs = []
-            for (ns, name), v in token["data"].items():
-                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
-            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
-        else:
-            token["data"] = "<%s>" % token["name"]
-        if token.get("selfClosing"):
-            token["data"] = token["data"][:-1] + "/>"
-
-        token["type"] = "Characters"
-
-        del token["name"]
-        return token
-
-    def sanitize_css(self, style):
-        # disallow urls
-        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
-        # gauntlet
-        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
-            return ''
-        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
-            return ''
-
-        clean = []
-        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
-            if not value:
-                continue
-            if prop.lower() in self.allowed_css_properties:
-                clean.append(prop + ': ' + value + ';')
-            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
-                                                'padding']:
-                for keyword in value.split():
-                    if keyword not in self.allowed_css_keywords and \
-                            not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
-                        break
-                else:
-                    clean.append(prop + ': ' + value + ';')
-            elif prop.lower() in self.allowed_svg_properties:
-                clean.append(prop + ': ' + value + ';')
-
-        return ' '.join(clean)
diff --git a/src/pip/_vendor/html5lib/filters/whitespace.py b/src/pip/_vendor/html5lib/filters/whitespace.py
deleted file mode 100644
index 0d12584b4..000000000
--- a/src/pip/_vendor/html5lib/filters/whitespace.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import re
-
-from . import base
-from ..constants import rcdataElements, spaceCharacters
-spaceCharacters = "".join(spaceCharacters)
-
-SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
-
-
-class Filter(base.Filter):
-    """Collapses whitespace except in pre, textarea, and script elements"""
-    spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
-
-    def __iter__(self):
-        preserve = 0
-        for token in base.Filter.__iter__(self):
-            type = token["type"]
-            if type == "StartTag" \
-                    and (preserve or token["name"] in self.spacePreserveElements):
-                preserve += 1
-
-            elif type == "EndTag" and preserve:
-                preserve -= 1
-
-            elif not preserve and type == "SpaceCharacters" and token["data"]:
-                # Test on token["data"] above to not introduce spaces where there were not
-                token["data"] = " "
-
-            elif not preserve and type == "Characters":
-                token["data"] = collapse_spaces(token["data"])
-
-            yield token
-
-
-def collapse_spaces(text):
-    return SPACES_REGEX.sub(' ', text)
diff --git a/src/pip/_vendor/html5lib/html5parser.py b/src/pip/_vendor/html5lib/html5parser.py
deleted file mode 100644
index d06784f3d..000000000
--- a/src/pip/_vendor/html5lib/html5parser.py
+++ /dev/null
@@ -1,2795 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import with_metaclass, viewkeys
-
-import types
-
-from . import _inputstream
-from . import _tokenizer
-
-from . import treebuilders
-from .treebuilders.base import Marker
-
-from . import _utils
-from .constants import (
-    spaceCharacters, asciiUpper2Lower,
-    specialElements, headingElements, cdataElements, rcdataElements,
-    tokenTypes, tagTokenTypes,
-    namespaces,
-    htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
-    adjustForeignAttributes as adjustForeignAttributesMap,
-    adjustMathMLAttributes, adjustSVGAttributes,
-    E,
-    _ReparseException
-)
-
-
-def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
-    """Parse an HTML document as a string or file-like object into a tree
-
-    :arg doc: the document to parse as a string or file-like object
-
-    :arg treebuilder: the treebuilder to use when parsing
-
-    :arg namespaceHTMLElements: whether or not to namespace HTML elements
-
-    :returns: parsed tree
-
-    Example:
-
-    >>> from html5lib.html5parser import parse
-    >>> parse('<html><body><p>This is a doc</p></body></html>')
-    <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
-
-    """
-    tb = treebuilders.getTreeBuilder(treebuilder)
-    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
-    return p.parse(doc, **kwargs)
-
-
-def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
-    """Parse an HTML fragment as a string or file-like object into a tree
-
-    :arg doc: the fragment to parse as a string or file-like object
-
-    :arg container: the container context to parse the fragment in
-
-    :arg treebuilder: the treebuilder to use when parsing
-
-    :arg namespaceHTMLElements: whether or not to namespace HTML elements
-
-    :returns: parsed tree
-
-    Example:
-
-    >>> from html5lib.html5libparser import parseFragment
-    >>> parseFragment('<b>this is a fragment</b>')
-    <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
-
-    """
-    tb = treebuilders.getTreeBuilder(treebuilder)
-    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
-    return p.parseFragment(doc, container=container, **kwargs)
-
-
-def method_decorator_metaclass(function):
-    class Decorated(type):
-        def __new__(meta, classname, bases, classDict):
-            for attributeName, attribute in classDict.items():
-                if isinstance(attribute, types.FunctionType):
-                    attribute = function(attribute)
-
-                classDict[attributeName] = attribute
-            return type.__new__(meta, classname, bases, classDict)
-    return Decorated
-
-
-class HTMLParser(object):
-    """HTML parser
-
-    Generates a tree structure from a stream of (possibly malformed) HTML.
-
-    """
-
-    def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
-        """
-        :arg tree: a treebuilder class controlling the type of tree that will be
-            returned. Built in treebuilders can be accessed through
-            html5lib.treebuilders.getTreeBuilder(treeType)
-
-        :arg strict: raise an exception when a parse error is encountered
-
-        :arg namespaceHTMLElements: whether or not to namespace HTML elements
-
-        :arg debug: whether or not to enable debug mode which logs things
-
-        Example:
-
-        >>> from html5lib.html5parser import HTMLParser
-        >>> parser = HTMLParser()                     # generates parser with etree builder
-        >>> parser = HTMLParser('lxml', strict=True)  # generates parser with lxml builder which is strict
-
-        """
-
-        # Raise an exception on the first error encountered
-        self.strict = strict
-
-        if tree is None:
-            tree = treebuilders.getTreeBuilder("etree")
-        self.tree = tree(namespaceHTMLElements)
-        self.errors = []
-
-        self.phases = {name: cls(self, self.tree) for name, cls in
-                       getPhases(debug).items()}
-
-    def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
-
-        self.innerHTMLMode = innerHTML
-        self.container = container
-        self.scripting = scripting
-        self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
-        self.reset()
-
-        try:
-            self.mainLoop()
-        except _ReparseException:
-            self.reset()
-            self.mainLoop()
-
-    def reset(self):
-        self.tree.reset()
-        self.firstStartTag = False
-        self.errors = []
-        self.log = []  # only used with debug mode
-        # "quirks" / "limited quirks" / "no quirks"
-        self.compatMode = "no quirks"
-
-        if self.innerHTMLMode:
-            self.innerHTML = self.container.lower()
-
-            if self.innerHTML in cdataElements:
-                self.tokenizer.state = self.tokenizer.rcdataState
-            elif self.innerHTML in rcdataElements:
-                self.tokenizer.state = self.tokenizer.rawtextState
-            elif self.innerHTML == 'plaintext':
-                self.tokenizer.state = self.tokenizer.plaintextState
-            else:
-                # state already is data state
-                # self.tokenizer.state = self.tokenizer.dataState
-                pass
-            self.phase = self.phases["beforeHtml"]
-            self.phase.insertHtmlElement()
-            self.resetInsertionMode()
-        else:
-            self.innerHTML = False  # pylint:disable=redefined-variable-type
-            self.phase = self.phases["initial"]
-
-        self.lastPhase = None
-
-        self.beforeRCDataPhase = None
-
-        self.framesetOK = True
-
-    @property
-    def documentEncoding(self):
-        """Name of the character encoding that was used to decode the input stream, or
-        :obj:`None` if that is not determined yet
-
-        """
-        if not hasattr(self, 'tokenizer'):
-            return None
-        return self.tokenizer.stream.charEncoding[0].name
-
-    def isHTMLIntegrationPoint(self, element):
-        if (element.name == "annotation-xml" and
-                element.namespace == namespaces["mathml"]):
-            return ("encoding" in element.attributes and
-                    element.attributes["encoding"].translate(
-                        asciiUpper2Lower) in
-                    ("text/html", "application/xhtml+xml"))
-        else:
-            return (element.namespace, element.name) in htmlIntegrationPointElements
-
-    def isMathMLTextIntegrationPoint(self, element):
-        return (element.namespace, element.name) in mathmlTextIntegrationPointElements
-
-    def mainLoop(self):
-        CharactersToken = tokenTypes["Characters"]
-        SpaceCharactersToken = tokenTypes["SpaceCharacters"]
-        StartTagToken = tokenTypes["StartTag"]
-        EndTagToken = tokenTypes["EndTag"]
-        CommentToken = tokenTypes["Comment"]
-        DoctypeToken = tokenTypes["Doctype"]
-        ParseErrorToken = tokenTypes["ParseError"]
-
-        for token in self.tokenizer:
-            prev_token = None
-            new_token = token
-            while new_token is not None:
-                prev_token = new_token
-                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
-                currentNodeNamespace = currentNode.namespace if currentNode else None
-                currentNodeName = currentNode.name if currentNode else None
-
-                type = new_token["type"]
-
-                if type == ParseErrorToken:
-                    self.parseError(new_token["data"], new_token.get("datavars", {}))
-                    new_token = None
-                else:
-                    if (len(self.tree.openElements) == 0 or
-                        currentNodeNamespace == self.tree.defaultNamespace or
-                        (self.isMathMLTextIntegrationPoint(currentNode) and
-                         ((type == StartTagToken and
-                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
-                          type in (CharactersToken, SpaceCharactersToken))) or
-                        (currentNodeNamespace == namespaces["mathml"] and
-                         currentNodeName == "annotation-xml" and
-                         type == StartTagToken and
-                         token["name"] == "svg") or
-                        (self.isHTMLIntegrationPoint(currentNode) and
-                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
-                        phase = self.phase
-                    else:
-                        phase = self.phases["inForeignContent"]
-
-                    if type == CharactersToken:
-                        new_token = phase.processCharacters(new_token)
-                    elif type == SpaceCharactersToken:
-                        new_token = phase.processSpaceCharacters(new_token)
-                    elif type == StartTagToken:
-                        new_token = phase.processStartTag(new_token)
-                    elif type == EndTagToken:
-                        new_token = phase.processEndTag(new_token)
-                    elif type == CommentToken:
-                        new_token = phase.processComment(new_token)
-                    elif type == DoctypeToken:
-                        new_token = phase.processDoctype(new_token)
-
-            if (type == StartTagToken and prev_token["selfClosing"] and
-                    not prev_token["selfClosingAcknowledged"]):
-                self.parseError("non-void-element-with-trailing-solidus",
-                                {"name": prev_token["name"]})
-
-        # When the loop finishes it's EOF
-        reprocess = True
-        phases = []
-        while reprocess:
-            phases.append(self.phase)
-            reprocess = self.phase.processEOF()
-            if reprocess:
-                assert self.phase not in phases
-
-    def parse(self, stream, *args, **kwargs):
-        """Parse a HTML document into a well-formed tree
-
-        :arg stream: a file-like object or string containing the HTML to be parsed
-
-            The optional encoding parameter must be a string that indicates
-            the encoding.  If specified, that encoding will be used,
-            regardless of any BOM or later declaration (such as in a meta
-            element).
-
-        :arg scripting: treat noscript elements as if JavaScript was turned on
-
-        :returns: parsed tree
-
-        Example:
-
-        >>> from html5lib.html5parser import HTMLParser
-        >>> parser = HTMLParser()
-        >>> parser.parse('<html><body><p>This is a doc</p></body></html>')
-        <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
-
-        """
-        self._parse(stream, False, None, *args, **kwargs)
-        return self.tree.getDocument()
-
-    def parseFragment(self, stream, *args, **kwargs):
-        """Parse a HTML fragment into a well-formed tree fragment
-
-        :arg container: name of the element we're setting the innerHTML
-            property if set to None, default to 'div'
-
-        :arg stream: a file-like object or string containing the HTML to be parsed
-
-            The optional encoding parameter must be a string that indicates
-            the encoding.  If specified, that encoding will be used,
-            regardless of any BOM or later declaration (such as in a meta
-            element)
-
-        :arg scripting: treat noscript elements as if JavaScript was turned on
-
-        :returns: parsed tree
-
-        Example:
-
-        >>> from html5lib.html5libparser import HTMLParser
-        >>> parser = HTMLParser()
-        >>> parser.parseFragment('<b>this is a fragment</b>')
-        <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
-
-        """
-        self._parse(stream, True, *args, **kwargs)
-        return self.tree.getFragment()
-
-    def parseError(self, errorcode="XXX-undefined-error", datavars=None):
-        # XXX The idea is to make errorcode mandatory.
-        if datavars is None:
-            datavars = {}
-        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
-        if self.strict:
-            raise ParseError(E[errorcode] % datavars)
-
-    def adjustMathMLAttributes(self, token):
-        adjust_attributes(token, adjustMathMLAttributes)
-
-    def adjustSVGAttributes(self, token):
-        adjust_attributes(token, adjustSVGAttributes)
-
-    def adjustForeignAttributes(self, token):
-        adjust_attributes(token, adjustForeignAttributesMap)
-
-    def reparseTokenNormal(self, token):
-        # pylint:disable=unused-argument
-        self.parser.phase()
-
-    def resetInsertionMode(self):
-        # The name of this method is mostly historical. (It's also used in the
-        # specification.)
-        last = False
-        newModes = {
-            "select": "inSelect",
-            "td": "inCell",
-            "th": "inCell",
-            "tr": "inRow",
-            "tbody": "inTableBody",
-            "thead": "inTableBody",
-            "tfoot": "inTableBody",
-            "caption": "inCaption",
-            "colgroup": "inColumnGroup",
-            "table": "inTable",
-            "head": "inBody",
-            "body": "inBody",
-            "frameset": "inFrameset",
-            "html": "beforeHead"
-        }
-        for node in self.tree.openElements[::-1]:
-            nodeName = node.name
-            new_phase = None
-            if node == self.tree.openElements[0]:
-                assert self.innerHTML
-                last = True
-                nodeName = self.innerHTML
-            # Check for conditions that should only happen in the innerHTML
-            # case
-            if nodeName in ("select", "colgroup", "head", "html"):
-                assert self.innerHTML
-
-            if not last and node.namespace != self.tree.defaultNamespace:
-                continue
-
-            if nodeName in newModes:
-                new_phase = self.phases[newModes[nodeName]]
-                break
-            elif last:
-                new_phase = self.phases["inBody"]
-                break
-
-        self.phase = new_phase
-
-    def parseRCDataRawtext(self, token, contentType):
-        # Generic RCDATA/RAWTEXT Parsing algorithm
-        assert contentType in ("RAWTEXT", "RCDATA")
-
-        self.tree.insertElement(token)
-
-        if contentType == "RAWTEXT":
-            self.tokenizer.state = self.tokenizer.rawtextState
-        else:
-            self.tokenizer.state = self.tokenizer.rcdataState
-
-        self.originalPhase = self.phase
-
-        self.phase = self.phases["text"]
-
-
-@_utils.memoize
-def getPhases(debug):
-    def log(function):
-        """Logger that records which phase processes each token"""
-        type_names = {value: key for key, value in tokenTypes.items()}
-
-        def wrapped(self, *args, **kwargs):
-            if function.__name__.startswith("process") and len(args) > 0:
-                token = args[0]
-                info = {"type": type_names[token['type']]}
-                if token['type'] in tagTokenTypes:
-                    info["name"] = token['name']
-
-                self.parser.log.append((self.parser.tokenizer.state.__name__,
-                                        self.parser.phase.__class__.__name__,
-                                        self.__class__.__name__,
-                                        function.__name__,
-                                        info))
-                return function(self, *args, **kwargs)
-            else:
-                return function(self, *args, **kwargs)
-        return wrapped
-
-    def getMetaclass(use_metaclass, metaclass_func):
-        if use_metaclass:
-            return method_decorator_metaclass(metaclass_func)
-        else:
-            return type
-
-    # pylint:disable=unused-argument
-    class Phase(with_metaclass(getMetaclass(debug, log))):
-        """Base class for helper object that implements each phase of processing
-        """
-        __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
-
-        def __init__(self, parser, tree):
-            self.parser = parser
-            self.tree = tree
-            self.__startTagCache = {}
-            self.__endTagCache = {}
-
-        def processEOF(self):
-            raise NotImplementedError
-
-        def processComment(self, token):
-            # For most phases the following is correct. Where it's not it will be
-            # overridden.
-            self.tree.insertComment(token, self.tree.openElements[-1])
-
-        def processDoctype(self, token):
-            self.parser.parseError("unexpected-doctype")
-
-        def processCharacters(self, token):
-            self.tree.insertText(token["data"])
-
-        def processSpaceCharacters(self, token):
-            self.tree.insertText(token["data"])
-
-        def processStartTag(self, token):
-            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
-            # requires a circular reference to the Phase, and this ends up with a significant
-            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
-            name = token["name"]
-            # In Py2, using `in` is quicker in general than try/except KeyError
-            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
-            if name in self.__startTagCache:
-                func = self.__startTagCache[name]
-            else:
-                func = self.__startTagCache[name] = self.startTagHandler[name]
-                # bound the cache size in case we get loads of unknown tags
-                while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
-                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
-                    self.__startTagCache.pop(next(iter(self.__startTagCache)))
-            return func(token)
-
-        def startTagHtml(self, token):
-            if not self.parser.firstStartTag and token["name"] == "html":
-                self.parser.parseError("non-html-root")
-            # XXX Need a check here to see if the first start tag token emitted is
-            # this token... If it's not, invoke self.parser.parseError().
-            for attr, value in token["data"].items():
-                if attr not in self.tree.openElements[0].attributes:
-                    self.tree.openElements[0].attributes[attr] = value
-            self.parser.firstStartTag = False
-
-        def processEndTag(self, token):
-            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
-            # requires a circular reference to the Phase, and this ends up with a significant
-            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
-            name = token["name"]
-            # In Py2, using `in` is quicker in general than try/except KeyError
-            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
-            if name in self.__endTagCache:
-                func = self.__endTagCache[name]
-            else:
-                func = self.__endTagCache[name] = self.endTagHandler[name]
-                # bound the cache size in case we get loads of unknown tags
-                while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
-                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
-                    self.__endTagCache.pop(next(iter(self.__endTagCache)))
-            return func(token)
-
-    class InitialPhase(Phase):
-        __slots__ = tuple()
-
-        def processSpaceCharacters(self, token):
-            pass
-
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
-
-        def processDoctype(self, token):
-            name = token["name"]
-            publicId = token["publicId"]
-            systemId = token["systemId"]
-            correct = token["correct"]
-
-            if (name != "html" or publicId is not None or
-                    systemId is not None and systemId != "about:legacy-compat"):
-                self.parser.parseError("unknown-doctype")
-
-            if publicId is None:
-                publicId = ""
-
-            self.tree.insertDoctype(token)
-
-            if publicId != "":
-                publicId = publicId.translate(asciiUpper2Lower)
-
-            if (not correct or token["name"] != "html" or
-                    publicId.startswith(
-                        ("+//silmaril//dtd html pro v0r11 19970101//",
-                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-                         "-//as//dtd html 3.0 aswedit + extensions//",
-                         "-//ietf//dtd html 2.0 level 1//",
-                         "-//ietf//dtd html 2.0 level 2//",
-                         "-//ietf//dtd html 2.0 strict level 1//",
-                         "-//ietf//dtd html 2.0 strict level 2//",
-                         "-//ietf//dtd html 2.0 strict//",
-                         "-//ietf//dtd html 2.0//",
-                         "-//ietf//dtd html 2.1e//",
-                         "-//ietf//dtd html 3.0//",
-                         "-//ietf//dtd html 3.2 final//",
-                         "-//ietf//dtd html 3.2//",
-                         "-//ietf//dtd html 3//",
-                         "-//ietf//dtd html level 0//",
-                         "-//ietf//dtd html level 1//",
-                         "-//ietf//dtd html level 2//",
-                         "-//ietf//dtd html level 3//",
-                         "-//ietf//dtd html strict level 0//",
-                         "-//ietf//dtd html strict level 1//",
-                         "-//ietf//dtd html strict level 2//",
-                         "-//ietf//dtd html strict level 3//",
-                         "-//ietf//dtd html strict//",
-                         "-//ietf//dtd html//",
-                         "-//metrius//dtd metrius presentational//",
-                         "-//microsoft//dtd internet explorer 2.0 html strict//",
-                         "-//microsoft//dtd internet explorer 2.0 html//",
-                         "-//microsoft//dtd internet explorer 2.0 tables//",
-                         "-//microsoft//dtd internet explorer 3.0 html strict//",
-                         "-//microsoft//dtd internet explorer 3.0 html//",
-                         "-//microsoft//dtd internet explorer 3.0 tables//",
-                         "-//netscape comm. corp.//dtd html//",
-                         "-//netscape comm. corp.//dtd strict html//",
-                         "-//o'reilly and associates//dtd html 2.0//",
-                         "-//o'reilly and associates//dtd html extended 1.0//",
-                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-                         "-//spyglass//dtd html 2.0 extended//",
-                         "-//sq//dtd html 2.0 hotmetal + extensions//",
-                         "-//sun microsystems corp.//dtd hotjava html//",
-                         "-//sun microsystems corp.//dtd hotjava strict html//",
-                         "-//w3c//dtd html 3 1995-03-24//",
-                         "-//w3c//dtd html 3.2 draft//",
-                         "-//w3c//dtd html 3.2 final//",
-                         "-//w3c//dtd html 3.2//",
-                         "-//w3c//dtd html 3.2s draft//",
-                         "-//w3c//dtd html 4.0 frameset//",
-                         "-//w3c//dtd html 4.0 transitional//",
-                         "-//w3c//dtd html experimental 19960712//",
-                         "-//w3c//dtd html experimental 970421//",
-                         "-//w3c//dtd w3 html//",
-                         "-//w3o//dtd w3 html 3.0//",
-                         "-//webtechs//dtd mozilla html 2.0//",
-                         "-//webtechs//dtd mozilla html//")) or
-                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
-                                 "-/w3c/dtd html 4.0 transitional/en",
-                                 "html") or
-                    publicId.startswith(
-                        ("-//w3c//dtd html 4.01 frameset//",
-                         "-//w3c//dtd html 4.01 transitional//")) and
-                    systemId is None or
-                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
-                self.parser.compatMode = "quirks"
-            elif (publicId.startswith(
-                    ("-//w3c//dtd xhtml 1.0 frameset//",
-                     "-//w3c//dtd xhtml 1.0 transitional//")) or
-                  publicId.startswith(
-                      ("-//w3c//dtd html 4.01 frameset//",
-                       "-//w3c//dtd html 4.01 transitional//")) and
-                  systemId is not None):
-                self.parser.compatMode = "limited quirks"
-
-            self.parser.phase = self.parser.phases["beforeHtml"]
-
-        def anythingElse(self):
-            self.parser.compatMode = "quirks"
-            self.parser.phase = self.parser.phases["beforeHtml"]
-
-        def processCharacters(self, token):
-            self.parser.parseError("expected-doctype-but-got-chars")
-            self.anythingElse()
-            return token
-
-        def processStartTag(self, token):
-            self.parser.parseError("expected-doctype-but-got-start-tag",
-                                   {"name": token["name"]})
-            self.anythingElse()
-            return token
-
-        def processEndTag(self, token):
-            self.parser.parseError("expected-doctype-but-got-end-tag",
-                                   {"name": token["name"]})
-            self.anythingElse()
-            return token
-
-        def processEOF(self):
-            self.parser.parseError("expected-doctype-but-got-eof")
-            self.anythingElse()
-            return True
-
-    class BeforeHtmlPhase(Phase):
-        __slots__ = tuple()
-
-        # helper methods
-        def insertHtmlElement(self):
-            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
-            self.parser.phase = self.parser.phases["beforeHead"]
-
-        # other
-        def processEOF(self):
-            self.insertHtmlElement()
-            return True
-
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
-
-        def processSpaceCharacters(self, token):
-            pass
-
-        def processCharacters(self, token):
-            self.insertHtmlElement()
-            return token
-
-        def processStartTag(self, token):
-            if token["name"] == "html":
-                self.parser.firstStartTag = True
-            self.insertHtmlElement()
-            return token
-
-        def processEndTag(self, token):
-            if token["name"] not in ("head", "body", "html", "br"):
-                self.parser.parseError("unexpected-end-tag-before-html",
-                                       {"name": token["name"]})
-            else:
-                self.insertHtmlElement()
-                return token
-
-    class BeforeHeadPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return True
-
-        def processSpaceCharacters(self, token):
-            pass
-
-        def processCharacters(self, token):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return token
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagHead(self, token):
-            self.tree.insertElement(token)
-            self.tree.headPointer = self.tree.openElements[-1]
-            self.parser.phase = self.parser.phases["inHead"]
-
-        def startTagOther(self, token):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return token
-
-        def endTagImplyHead(self, token):
-            self.startTagHead(impliedTagToken("head", "StartTag"))
-            return token
-
-        def endTagOther(self, token):
-            self.parser.parseError("end-tag-after-implied-root",
-                                   {"name": token["name"]})
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("head", startTagHead)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("head", "body", "html", "br"), endTagImplyHead)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InHeadPhase(Phase):
-        __slots__ = tuple()
-
-        # the real thing
-        def processEOF(self):
-            self.anythingElse()
-            return True
-
-        def processCharacters(self, token):
-            self.anythingElse()
-            return token
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagHead(self, token):
-            self.parser.parseError("two-heads-are-not-better-than-one")
-
-        def startTagBaseLinkCommand(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-
-        def startTagMeta(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-
-            attributes = token["data"]
-            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
-                if "charset" in attributes:
-                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
-                elif ("content" in attributes and
-                      "http-equiv" in attributes and
-                      attributes["http-equiv"].lower() == "content-type"):
-                    # Encoding it as UTF-8 here is a hack, as really we should pass
-                    # the abstract Unicode string, and just use the
-                    # ContentAttrParser on that, but using UTF-8 allows all chars
-                    # to be encoded and as a ASCII-superset works.
-                    data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
-                    parser = _inputstream.ContentAttrParser(data)
-                    codec = parser.parse()
-                    self.parser.tokenizer.stream.changeEncoding(codec)
-
-        def startTagTitle(self, token):
-            self.parser.parseRCDataRawtext(token, "RCDATA")
-
-        def startTagNoFramesStyle(self, token):
-            # Need to decide whether to implement the scripting-disabled case
-            self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
-        def startTagNoscript(self, token):
-            if self.parser.scripting:
-                self.parser.parseRCDataRawtext(token, "RAWTEXT")
-            else:
-                self.tree.insertElement(token)
-                self.parser.phase = self.parser.phases["inHeadNoscript"]
-
-        def startTagScript(self, token):
-            self.tree.insertElement(token)
-            self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
-            self.parser.originalPhase = self.parser.phase
-            self.parser.phase = self.parser.phases["text"]
-
-        def startTagOther(self, token):
-            self.anythingElse()
-            return token
-
-        def endTagHead(self, token):
-            node = self.parser.tree.openElements.pop()
-            assert node.name == "head", "Expected head got %s" % node.name
-            self.parser.phase = self.parser.phases["afterHead"]
-
-        def endTagHtmlBodyBr(self, token):
-            self.anythingElse()
-            return token
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def anythingElse(self):
-            self.endTagHead(impliedTagToken("head"))
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("title", startTagTitle),
-            (("noframes", "style"), startTagNoFramesStyle),
-            ("noscript", startTagNoscript),
-            ("script", startTagScript),
-            (("base", "basefont", "bgsound", "command", "link"),
-             startTagBaseLinkCommand),
-            ("meta", startTagMeta),
-            ("head", startTagHead)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("head", endTagHead),
-            (("br", "html", "body"), endTagHtmlBodyBr)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InHeadNoscriptPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            self.parser.parseError("eof-in-head-noscript")
-            self.anythingElse()
-            return True
-
-        def processComment(self, token):
-            return self.parser.phases["inHead"].processComment(token)
-
-        def processCharacters(self, token):
-            self.parser.parseError("char-in-head-noscript")
-            self.anythingElse()
-            return token
-
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inHead"].processSpaceCharacters(token)
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagBaseLinkCommand(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagHeadNoscript(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
-            self.anythingElse()
-            return token
-
-        def endTagNoscript(self, token):
-            node = self.parser.tree.openElements.pop()
-            assert node.name == "noscript", "Expected noscript got %s" % node.name
-            self.parser.phase = self.parser.phases["inHead"]
-
-        def endTagBr(self, token):
-            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
-            self.anythingElse()
-            return token
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def anythingElse(self):
-            # Caller must raise parse error first!
-            self.endTagNoscript(impliedTagToken("noscript"))
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
-            (("head", "noscript"), startTagHeadNoscript),
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("noscript", endTagNoscript),
-            ("br", endTagBr),
-        ])
-        endTagHandler.default = endTagOther
-
-    class AfterHeadPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            self.anythingElse()
-            return True
-
-        def processCharacters(self, token):
-            self.anythingElse()
-            return token
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagBody(self, token):
-            self.parser.framesetOK = False
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inBody"]
-
-        def startTagFrameset(self, token):
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inFrameset"]
-
-        def startTagFromHead(self, token):
-            self.parser.parseError("unexpected-start-tag-out-of-my-head",
-                                   {"name": token["name"]})
-            self.tree.openElements.append(self.tree.headPointer)
-            self.parser.phases["inHead"].processStartTag(token)
-            for node in self.tree.openElements[::-1]:
-                if node.name == "head":
-                    self.tree.openElements.remove(node)
-                    break
-
-        def startTagHead(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
-
-        def startTagOther(self, token):
-            self.anythingElse()
-            return token
-
-        def endTagHtmlBodyBr(self, token):
-            self.anythingElse()
-            return token
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def anythingElse(self):
-            self.tree.insertElement(impliedTagToken("body", "StartTag"))
-            self.parser.phase = self.parser.phases["inBody"]
-            self.parser.framesetOK = True
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("body", startTagBody),
-            ("frameset", startTagFrameset),
-            (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
-              "style", "title"),
-             startTagFromHead),
-            ("head", startTagHead)
-        ])
-        startTagHandler.default = startTagOther
-        endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
-                                                  endTagHtmlBodyBr)])
-        endTagHandler.default = endTagOther
-
-    class InBodyPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
-        # the really-really-really-very crazy mode
-        __slots__ = ("processSpaceCharacters",)
-
-        def __init__(self, *args, **kwargs):
-            super(InBodyPhase, self).__init__(*args, **kwargs)
-            # Set this to the default handler
-            self.processSpaceCharacters = self.processSpaceCharactersNonPre
-
-        def isMatchingFormattingElement(self, node1, node2):
-            return (node1.name == node2.name and
-                    node1.namespace == node2.namespace and
-                    node1.attributes == node2.attributes)
-
-        # helper
-        def addFormattingElement(self, token):
-            self.tree.insertElement(token)
-            element = self.tree.openElements[-1]
-
-            matchingElements = []
-            for node in self.tree.activeFormattingElements[::-1]:
-                if node is Marker:
-                    break
-                elif self.isMatchingFormattingElement(node, element):
-                    matchingElements.append(node)
-
-            assert len(matchingElements) <= 3
-            if len(matchingElements) == 3:
-                self.tree.activeFormattingElements.remove(matchingElements[-1])
-            self.tree.activeFormattingElements.append(element)
-
-        # the real deal
-        def processEOF(self):
-            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
-                                          "tfoot", "th", "thead", "tr", "body",
-                                          "html"))
-            for node in self.tree.openElements[::-1]:
-                if node.name not in allowed_elements:
-                    self.parser.parseError("expected-closing-tag-but-got-eof")
-                    break
-            # Stop parsing
-
-        def processSpaceCharactersDropNewline(self, token):
-            # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
-            # want to drop leading newlines
-            data = token["data"]
-            self.processSpaceCharacters = self.processSpaceCharactersNonPre
-            if (data.startswith("\n") and
-                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
-                    not self.tree.openElements[-1].hasContent()):
-                data = data[1:]
-            if data:
-                self.tree.reconstructActiveFormattingElements()
-                self.tree.insertText(data)
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                # The tokenizer should always emit null on its own
-                return
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertText(token["data"])
-            # This must be bad for performance
-            if (self.parser.framesetOK and
-                any([char not in spaceCharacters
-                     for char in token["data"]])):
-                self.parser.framesetOK = False
-
-        def processSpaceCharactersNonPre(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertText(token["data"])
-
-        def startTagProcessInHead(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagBody(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": "body"})
-            if (len(self.tree.openElements) == 1 or
-                    self.tree.openElements[1].name != "body"):
-                assert self.parser.innerHTML
-            else:
-                self.parser.framesetOK = False
-                for attr, value in token["data"].items():
-                    if attr not in self.tree.openElements[1].attributes:
-                        self.tree.openElements[1].attributes[attr] = value
-
-        def startTagFrameset(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
-            if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
-                assert self.parser.innerHTML
-            elif not self.parser.framesetOK:
-                pass
-            else:
-                if self.tree.openElements[1].parent:
-                    self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
-                while self.tree.openElements[-1].name != "html":
-                    self.tree.openElements.pop()
-                self.tree.insertElement(token)
-                self.parser.phase = self.parser.phases["inFrameset"]
-
-        def startTagCloseP(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
-
-        def startTagPreListing(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.parser.framesetOK = False
-            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
-
-        def startTagForm(self, token):
-            if self.tree.formPointer:
-                self.parser.parseError("unexpected-start-tag", {"name": "form"})
-            else:
-                if self.tree.elementInScope("p", variant="button"):
-                    self.endTagP(impliedTagToken("p"))
-                self.tree.insertElement(token)
-                self.tree.formPointer = self.tree.openElements[-1]
-
-        def startTagListItem(self, token):
-            self.parser.framesetOK = False
-
-            stopNamesMap = {"li": ["li"],
-                            "dt": ["dt", "dd"],
-                            "dd": ["dt", "dd"]}
-            stopNames = stopNamesMap[token["name"]]
-            for node in reversed(self.tree.openElements):
-                if node.name in stopNames:
-                    self.parser.phase.processEndTag(
-                        impliedTagToken(node.name, "EndTag"))
-                    break
-                if (node.nameTuple in specialElements and
-                        node.name not in ("address", "div", "p")):
-                    break
-
-            if self.tree.elementInScope("p", variant="button"):
-                self.parser.phase.processEndTag(
-                    impliedTagToken("p", "EndTag"))
-
-            self.tree.insertElement(token)
-
-        def startTagPlaintext(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
-
-        def startTagHeading(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            if self.tree.openElements[-1].name in headingElements:
-                self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
-                self.tree.openElements.pop()
-            self.tree.insertElement(token)
-
-        def startTagA(self, token):
-            afeAElement = self.tree.elementInActiveFormattingElements("a")
-            if afeAElement:
-                self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                       {"startName": "a", "endName": "a"})
-                self.endTagFormatting(impliedTagToken("a"))
-                if afeAElement in self.tree.openElements:
-                    self.tree.openElements.remove(afeAElement)
-                if afeAElement in self.tree.activeFormattingElements:
-                    self.tree.activeFormattingElements.remove(afeAElement)
-            self.tree.reconstructActiveFormattingElements()
-            self.addFormattingElement(token)
-
-        def startTagFormatting(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.addFormattingElement(token)
-
-        def startTagNobr(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            if self.tree.elementInScope("nobr"):
-                self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                       {"startName": "nobr", "endName": "nobr"})
-                self.processEndTag(impliedTagToken("nobr"))
-                # XXX Need tests that trigger the following
-                self.tree.reconstructActiveFormattingElements()
-            self.addFormattingElement(token)
-
-        def startTagButton(self, token):
-            if self.tree.elementInScope("button"):
-                self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                       {"startName": "button", "endName": "button"})
-                self.processEndTag(impliedTagToken("button"))
-                return token
-            else:
-                self.tree.reconstructActiveFormattingElements()
-                self.tree.insertElement(token)
-                self.parser.framesetOK = False
-
-        def startTagAppletMarqueeObject(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-            self.tree.activeFormattingElements.append(Marker)
-            self.parser.framesetOK = False
-
-        def startTagXmp(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.framesetOK = False
-            self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
-        def startTagTable(self, token):
-            if self.parser.compatMode != "quirks":
-                if self.tree.elementInScope("p", variant="button"):
-                    self.processEndTag(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.parser.framesetOK = False
-            self.parser.phase = self.parser.phases["inTable"]
-
-        def startTagVoidFormatting(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-            self.parser.framesetOK = False
-
-        def startTagInput(self, token):
-            framesetOK = self.parser.framesetOK
-            self.startTagVoidFormatting(token)
-            if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
-                # input type=hidden doesn't change framesetOK
-                self.parser.framesetOK = framesetOK
-
-        def startTagParamSource(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-
-        def startTagHr(self, token):
-            if self.tree.elementInScope("p", variant="button"):
-                self.endTagP(impliedTagToken("p"))
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-            self.parser.framesetOK = False
-
-        def startTagImage(self, token):
-            # No really...
-            self.parser.parseError("unexpected-start-tag-treated-as",
-                                   {"originalName": "image", "newName": "img"})
-            self.processStartTag(impliedTagToken("img", "StartTag",
-                                                 attributes=token["data"],
-                                                 selfClosing=token["selfClosing"]))
-
-        def startTagIsIndex(self, token):
-            self.parser.parseError("deprecated-tag", {"name": "isindex"})
-            if self.tree.formPointer:
-                return
-            form_attrs = {}
-            if "action" in token["data"]:
-                form_attrs["action"] = token["data"]["action"]
-            self.processStartTag(impliedTagToken("form", "StartTag",
-                                                 attributes=form_attrs))
-            self.processStartTag(impliedTagToken("hr", "StartTag"))
-            self.processStartTag(impliedTagToken("label", "StartTag"))
-            # XXX Localization ...
-            if "prompt" in token["data"]:
-                prompt = token["data"]["prompt"]
-            else:
-                prompt = "This is a searchable index. Enter search keywords: "
-            self.processCharacters(
-                {"type": tokenTypes["Characters"], "data": prompt})
-            attributes = token["data"].copy()
-            if "action" in attributes:
-                del attributes["action"]
-            if "prompt" in attributes:
-                del attributes["prompt"]
-            attributes["name"] = "isindex"
-            self.processStartTag(impliedTagToken("input", "StartTag",
-                                                 attributes=attributes,
-                                                 selfClosing=token["selfClosing"]))
-            self.processEndTag(impliedTagToken("label"))
-            self.processStartTag(impliedTagToken("hr", "StartTag"))
-            self.processEndTag(impliedTagToken("form"))
-
-        def startTagTextarea(self, token):
-            self.tree.insertElement(token)
-            self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
-            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
-            self.parser.framesetOK = False
-
-        def startTagIFrame(self, token):
-            self.parser.framesetOK = False
-            self.startTagRawtext(token)
-
-        def startTagNoscript(self, token):
-            if self.parser.scripting:
-                self.startTagRawtext(token)
-            else:
-                self.startTagOther(token)
-
-        def startTagRawtext(self, token):
-            """iframe, noembed noframes, noscript(if scripting enabled)"""
-            self.parser.parseRCDataRawtext(token, "RAWTEXT")
-
-        def startTagOpt(self, token):
-            if self.tree.openElements[-1].name == "option":
-                self.parser.phase.processEndTag(impliedTagToken("option"))
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.tree.insertElement(token)
-
-        def startTagSelect(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-            self.parser.framesetOK = False
-            if self.parser.phase in (self.parser.phases["inTable"],
-                                     self.parser.phases["inCaption"],
-                                     self.parser.phases["inColumnGroup"],
-                                     self.parser.phases["inTableBody"],
-                                     self.parser.phases["inRow"],
-                                     self.parser.phases["inCell"]):
-                self.parser.phase = self.parser.phases["inSelectInTable"]
-            else:
-                self.parser.phase = self.parser.phases["inSelect"]
-
-        def startTagRpRt(self, token):
-            if self.tree.elementInScope("ruby"):
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1].name != "ruby":
-                    self.parser.parseError()
-            self.tree.insertElement(token)
-
-        def startTagMath(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.adjustMathMLAttributes(token)
-            self.parser.adjustForeignAttributes(token)
-            token["namespace"] = namespaces["mathml"]
-            self.tree.insertElement(token)
-            # Need to get the parse error right for the case where the token
-            # has a namespace not equal to the xmlns attribute
-            if token["selfClosing"]:
-                self.tree.openElements.pop()
-                token["selfClosingAcknowledged"] = True
-
-        def startTagSvg(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.parser.adjustSVGAttributes(token)
-            self.parser.adjustForeignAttributes(token)
-            token["namespace"] = namespaces["svg"]
-            self.tree.insertElement(token)
-            # Need to get the parse error right for the case where the token
-            # has a namespace not equal to the xmlns attribute
-            if token["selfClosing"]:
-                self.tree.openElements.pop()
-                token["selfClosingAcknowledged"] = True
-
-        def startTagMisplaced(self, token):
-            """ Elements that should be children of other elements that have a
-            different insertion mode; here they are ignored
-            "caption", "col", "colgroup", "frame", "frameset", "head",
-            "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
-            "tr", "noscript"
-            """
-            self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
-
-        def startTagOther(self, token):
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(token)
-
-        def endTagP(self, token):
-            if not self.tree.elementInScope("p", variant="button"):
-                self.startTagCloseP(impliedTagToken("p", "StartTag"))
-                self.parser.parseError("unexpected-end-tag", {"name": "p"})
-                self.endTagP(impliedTagToken("p", "EndTag"))
-            else:
-                self.tree.generateImpliedEndTags("p")
-                if self.tree.openElements[-1].name != "p":
-                    self.parser.parseError("unexpected-end-tag", {"name": "p"})
-                node = self.tree.openElements.pop()
-                while node.name != "p":
-                    node = self.tree.openElements.pop()
-
-        def endTagBody(self, token):
-            if not self.tree.elementInScope("body"):
-                self.parser.parseError()
-                return
-            elif self.tree.openElements[-1].name != "body":
-                for node in self.tree.openElements[2:]:
-                    if node.name not in frozenset(("dd", "dt", "li", "optgroup",
-                                                   "option", "p", "rp", "rt",
-                                                   "tbody", "td", "tfoot",
-                                                   "th", "thead", "tr", "body",
-                                                   "html")):
-                        # Not sure this is the correct name for the parse error
-                        self.parser.parseError(
-                            "expected-one-end-tag-but-got-another",
-                            {"gotName": "body", "expectedName": node.name})
-                        break
-            self.parser.phase = self.parser.phases["afterBody"]
-
-        def endTagHtml(self, token):
-            # We repeat the test for the body end tag token being ignored here
-            if self.tree.elementInScope("body"):
-                self.endTagBody(impliedTagToken("body"))
-                return token
-
-        def endTagBlock(self, token):
-            # Put us back in the right whitespace handling mode
-            if token["name"] == "pre":
-                self.processSpaceCharacters = self.processSpaceCharactersNonPre
-            inScope = self.tree.elementInScope(token["name"])
-            if inScope:
-                self.tree.generateImpliedEndTags()
-            if self.tree.openElements[-1].name != token["name"]:
-                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-            if inScope:
-                node = self.tree.openElements.pop()
-                while node.name != token["name"]:
-                    node = self.tree.openElements.pop()
-
-        def endTagForm(self, token):
-            node = self.tree.formPointer
-            self.tree.formPointer = None
-            if node is None or not self.tree.elementInScope(node):
-                self.parser.parseError("unexpected-end-tag",
-                                       {"name": "form"})
-            else:
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1] != node:
-                    self.parser.parseError("end-tag-too-early-ignored",
-                                           {"name": "form"})
-                self.tree.openElements.remove(node)
-
-        def endTagListItem(self, token):
-            if token["name"] == "li":
-                variant = "list"
-            else:
-                variant = None
-            if not self.tree.elementInScope(token["name"], variant=variant):
-                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-            else:
-                self.tree.generateImpliedEndTags(exclude=token["name"])
-                if self.tree.openElements[-1].name != token["name"]:
-                    self.parser.parseError(
-                        "end-tag-too-early",
-                        {"name": token["name"]})
-                node = self.tree.openElements.pop()
-                while node.name != token["name"]:
-                    node = self.tree.openElements.pop()
-
-        def endTagHeading(self, token):
-            for item in headingElements:
-                if self.tree.elementInScope(item):
-                    self.tree.generateImpliedEndTags()
-                    break
-            if self.tree.openElements[-1].name != token["name"]:
-                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-
-            for item in headingElements:
-                if self.tree.elementInScope(item):
-                    item = self.tree.openElements.pop()
-                    while item.name not in headingElements:
-                        item = self.tree.openElements.pop()
-                    break
-
-        def endTagFormatting(self, token):
-            """The much-feared adoption agency algorithm"""
-            # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
-            # XXX Better parseError messages appreciated.
-
-            # Step 1
-            outerLoopCounter = 0
-
-            # Step 2
-            while outerLoopCounter < 8:
-
-                # Step 3
-                outerLoopCounter += 1
-
-                # Step 4:
-
-                # Let the formatting element be the last element in
-                # the list of active formatting elements that:
-                # - is between the end of the list and the last scope
-                # marker in the list, if any, or the start of the list
-                # otherwise, and
-                # - has the same tag name as the token.
-                formattingElement = self.tree.elementInActiveFormattingElements(
-                    token["name"])
-                if (not formattingElement or
-                    (formattingElement in self.tree.openElements and
-                     not self.tree.elementInScope(formattingElement.name))):
-                    # If there is no such node, then abort these steps
-                    # and instead act as described in the "any other
-                    # end tag" entry below.
-                    self.endTagOther(token)
-                    return
-
-                # Otherwise, if there is such a node, but that node is
-                # not in the stack of open elements, then this is a
-                # parse error; remove the element from the list, and
-                # abort these steps.
-                elif formattingElement not in self.tree.openElements:
-                    self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
-                    self.tree.activeFormattingElements.remove(formattingElement)
-                    return
-
-                # Otherwise, if there is such a node, and that node is
-                # also in the stack of open elements, but the element
-                # is not in scope, then this is a parse error; ignore
-                # the token, and abort these steps.
-                elif not self.tree.elementInScope(formattingElement.name):
-                    self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
-                    return
-
-                # Otherwise, there is a formatting element and that
-                # element is in the stack and is in scope. If the
-                # element is not the current node, this is a parse
-                # error. In any case, proceed with the algorithm as
-                # written in the following steps.
-                else:
-                    if formattingElement != self.tree.openElements[-1]:
-                        self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
-
-                # Step 5:
-
-                # Let the furthest block be the topmost node in the
-                # stack of open elements that is lower in the stack
-                # than the formatting element, and is an element in
-                # the special category. There might not be one.
-                afeIndex = self.tree.openElements.index(formattingElement)
-                furthestBlock = None
-                for element in self.tree.openElements[afeIndex:]:
-                    if element.nameTuple in specialElements:
-                        furthestBlock = element
-                        break
-
-                # Step 6:
-
-                # If there is no furthest block, then the UA must
-                # first pop all the nodes from the bottom of the stack
-                # of open elements, from the current node up to and
-                # including the formatting element, then remove the
-                # formatting element from the list of active
-                # formatting elements, and finally abort these steps.
-                if furthestBlock is None:
-                    element = self.tree.openElements.pop()
-                    while element != formattingElement:
-                        element = self.tree.openElements.pop()
-                    self.tree.activeFormattingElements.remove(element)
-                    return
-
-                # Step 7
-                commonAncestor = self.tree.openElements[afeIndex - 1]
-
-                # Step 8:
-                # The bookmark is supposed to help us identify where to reinsert
-                # nodes in step 15. We have to ensure that we reinsert nodes after
-                # the node before the active formatting element. Note the bookmark
-                # can move in step 9.7
-                bookmark = self.tree.activeFormattingElements.index(formattingElement)
-
-                # Step 9
-                lastNode = node = furthestBlock
-                innerLoopCounter = 0
-
-                index = self.tree.openElements.index(node)
-                while innerLoopCounter < 3:
-                    innerLoopCounter += 1
-                    # Node is element before node in open elements
-                    index -= 1
-                    node = self.tree.openElements[index]
-                    if node not in self.tree.activeFormattingElements:
-                        self.tree.openElements.remove(node)
-                        continue
-                    # Step 9.6
-                    if node == formattingElement:
-                        break
-                    # Step 9.7
-                    if lastNode == furthestBlock:
-                        bookmark = self.tree.activeFormattingElements.index(node) + 1
-                    # Step 9.8
-                    clone = node.cloneNode()
-                    # Replace node with clone
-                    self.tree.activeFormattingElements[
-                        self.tree.activeFormattingElements.index(node)] = clone
-                    self.tree.openElements[
-                        self.tree.openElements.index(node)] = clone
-                    node = clone
-                    # Step 9.9
-                    # Remove lastNode from its parents, if any
-                    if lastNode.parent:
-                        lastNode.parent.removeChild(lastNode)
-                    node.appendChild(lastNode)
-                    # Step 9.10
-                    lastNode = node
-
-                # Step 10
-                # Foster parent lastNode if commonAncestor is a
-                # table, tbody, tfoot, thead, or tr we need to foster
-                # parent the lastNode
-                if lastNode.parent:
-                    lastNode.parent.removeChild(lastNode)
-
-                if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
-                    parent, insertBefore = self.tree.getTableMisnestedNodePosition()
-                    parent.insertBefore(lastNode, insertBefore)
-                else:
-                    commonAncestor.appendChild(lastNode)
-
-                # Step 11
-                clone = formattingElement.cloneNode()
-
-                # Step 12
-                furthestBlock.reparentChildren(clone)
-
-                # Step 13
-                furthestBlock.appendChild(clone)
-
-                # Step 14
-                self.tree.activeFormattingElements.remove(formattingElement)
-                self.tree.activeFormattingElements.insert(bookmark, clone)
-
-                # Step 15
-                self.tree.openElements.remove(formattingElement)
-                self.tree.openElements.insert(
-                    self.tree.openElements.index(furthestBlock) + 1, clone)
-
-        def endTagAppletMarqueeObject(self, token):
-            if self.tree.elementInScope(token["name"]):
-                self.tree.generateImpliedEndTags()
-            if self.tree.openElements[-1].name != token["name"]:
-                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
-
-            if self.tree.elementInScope(token["name"]):
-                element = self.tree.openElements.pop()
-                while element.name != token["name"]:
-                    element = self.tree.openElements.pop()
-                self.tree.clearActiveFormattingElements()
-
-        def endTagBr(self, token):
-            self.parser.parseError("unexpected-end-tag-treated-as",
-                                   {"originalName": "br", "newName": "br element"})
-            self.tree.reconstructActiveFormattingElements()
-            self.tree.insertElement(impliedTagToken("br", "StartTag"))
-            self.tree.openElements.pop()
-
-        def endTagOther(self, token):
-            for node in self.tree.openElements[::-1]:
-                if node.name == token["name"]:
-                    self.tree.generateImpliedEndTags(exclude=token["name"])
-                    if self.tree.openElements[-1].name != token["name"]:
-                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-                    while self.tree.openElements.pop() != node:
-                        pass
-                    break
-                else:
-                    if node.nameTuple in specialElements:
-                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-                        break
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("base", "basefont", "bgsound", "command", "link", "meta",
-              "script", "style", "title"),
-             startTagProcessInHead),
-            ("body", startTagBody),
-            ("frameset", startTagFrameset),
-            (("address", "article", "aside", "blockquote", "center", "details",
-              "dir", "div", "dl", "fieldset", "figcaption", "figure",
-              "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
-              "section", "summary", "ul"),
-             startTagCloseP),
-            (headingElements, startTagHeading),
-            (("pre", "listing"), startTagPreListing),
-            ("form", startTagForm),
-            (("li", "dd", "dt"), startTagListItem),
-            ("plaintext", startTagPlaintext),
-            ("a", startTagA),
-            (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-              "strong", "tt", "u"), startTagFormatting),
-            ("nobr", startTagNobr),
-            ("button", startTagButton),
-            (("applet", "marquee", "object"), startTagAppletMarqueeObject),
-            ("xmp", startTagXmp),
-            ("table", startTagTable),
-            (("area", "br", "embed", "img", "keygen", "wbr"),
-             startTagVoidFormatting),
-            (("param", "source", "track"), startTagParamSource),
-            ("input", startTagInput),
-            ("hr", startTagHr),
-            ("image", startTagImage),
-            ("isindex", startTagIsIndex),
-            ("textarea", startTagTextarea),
-            ("iframe", startTagIFrame),
-            ("noscript", startTagNoscript),
-            (("noembed", "noframes"), startTagRawtext),
-            ("select", startTagSelect),
-            (("rp", "rt"), startTagRpRt),
-            (("option", "optgroup"), startTagOpt),
-            (("math"), startTagMath),
-            (("svg"), startTagSvg),
-            (("caption", "col", "colgroup", "frame", "head",
-              "tbody", "td", "tfoot", "th", "thead",
-              "tr"), startTagMisplaced)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("body", endTagBody),
-            ("html", endTagHtml),
-            (("address", "article", "aside", "blockquote", "button", "center",
-              "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
-              "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
-              "section", "summary", "ul"), endTagBlock),
-            ("form", endTagForm),
-            ("p", endTagP),
-            (("dd", "dt", "li"), endTagListItem),
-            (headingElements, endTagHeading),
-            (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
-              "strike", "strong", "tt", "u"), endTagFormatting),
-            (("applet", "marquee", "object"), endTagAppletMarqueeObject),
-            ("br", endTagBr),
-        ])
-        endTagHandler.default = endTagOther
-
-    class TextPhase(Phase):
-        __slots__ = tuple()
-
-        def processCharacters(self, token):
-            self.tree.insertText(token["data"])
-
-        def processEOF(self):
-            self.parser.parseError("expected-named-closing-tag-but-got-eof",
-                                   {"name": self.tree.openElements[-1].name})
-            self.tree.openElements.pop()
-            self.parser.phase = self.parser.originalPhase
-            return True
-
-        def startTagOther(self, token):
-            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
-
-        def endTagScript(self, token):
-            node = self.tree.openElements.pop()
-            assert node.name == "script"
-            self.parser.phase = self.parser.originalPhase
-            # The rest of this method is all stuff that only happens if
-            # document.write works
-
-        def endTagOther(self, token):
-            self.tree.openElements.pop()
-            self.parser.phase = self.parser.originalPhase
-
-        startTagHandler = _utils.MethodDispatcher([])
-        startTagHandler.default = startTagOther
-        endTagHandler = _utils.MethodDispatcher([
-            ("script", endTagScript)])
-        endTagHandler.default = endTagOther
-
-    class InTablePhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
-        __slots__ = tuple()
-
-        # helper methods
-        def clearStackToTableContext(self):
-            # "clear the stack back to a table context"
-            while self.tree.openElements[-1].name not in ("table", "html"):
-                # self.parser.parseError("unexpected-implied-end-tag-in-table",
-                #  {"name":  self.tree.openElements[-1].name})
-                self.tree.openElements.pop()
-            # When the current node is <html> it's an innerHTML case
-
-        # processing methods
-        def processEOF(self):
-            if self.tree.openElements[-1].name != "html":
-                self.parser.parseError("eof-in-table")
-            else:
-                assert self.parser.innerHTML
-            # Stop parsing
-
-        def processSpaceCharacters(self, token):
-            originalPhase = self.parser.phase
-            self.parser.phase = self.parser.phases["inTableText"]
-            self.parser.phase.originalPhase = originalPhase
-            self.parser.phase.processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            originalPhase = self.parser.phase
-            self.parser.phase = self.parser.phases["inTableText"]
-            self.parser.phase.originalPhase = originalPhase
-            self.parser.phase.processCharacters(token)
-
-        def insertText(self, token):
-            # If we get here there must be at least one non-whitespace character
-            # Do the table magic!
-            self.tree.insertFromTable = True
-            self.parser.phases["inBody"].processCharacters(token)
-            self.tree.insertFromTable = False
-
-        def startTagCaption(self, token):
-            self.clearStackToTableContext()
-            self.tree.activeFormattingElements.append(Marker)
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inCaption"]
-
-        def startTagColgroup(self, token):
-            self.clearStackToTableContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inColumnGroup"]
-
-        def startTagCol(self, token):
-            self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
-            return token
-
-        def startTagRowGroup(self, token):
-            self.clearStackToTableContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inTableBody"]
-
-        def startTagImplyTbody(self, token):
-            self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
-            return token
-
-        def startTagTable(self, token):
-            self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                                   {"startName": "table", "endName": "table"})
-            self.parser.phase.processEndTag(impliedTagToken("table"))
-            if not self.parser.innerHTML:
-                return token
-
-        def startTagStyleScript(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagInput(self, token):
-            if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
-                self.parser.parseError("unexpected-hidden-input-in-table")
-                self.tree.insertElement(token)
-                # XXX associate with form
-                self.tree.openElements.pop()
-            else:
-                self.startTagOther(token)
-
-        def startTagForm(self, token):
-            self.parser.parseError("unexpected-form-in-table")
-            if self.tree.formPointer is None:
-                self.tree.insertElement(token)
-                self.tree.formPointer = self.tree.openElements[-1]
-                self.tree.openElements.pop()
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
-            # Do the table magic!
-            self.tree.insertFromTable = True
-            self.parser.phases["inBody"].processStartTag(token)
-            self.tree.insertFromTable = False
-
-        def endTagTable(self, token):
-            if self.tree.elementInScope("table", variant="table"):
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1].name != "table":
-                    self.parser.parseError("end-tag-too-early-named",
-                                           {"gotName": "table",
-                                            "expectedName": self.tree.openElements[-1].name})
-                while self.tree.openElements[-1].name != "table":
-                    self.tree.openElements.pop()
-                self.tree.openElements.pop()
-                self.parser.resetInsertionMode()
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
-            # Do the table magic!
-            self.tree.insertFromTable = True
-            self.parser.phases["inBody"].processEndTag(token)
-            self.tree.insertFromTable = False
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("caption", startTagCaption),
-            ("colgroup", startTagColgroup),
-            ("col", startTagCol),
-            (("tbody", "tfoot", "thead"), startTagRowGroup),
-            (("td", "th", "tr"), startTagImplyTbody),
-            ("table", startTagTable),
-            (("style", "script"), startTagStyleScript),
-            ("input", startTagInput),
-            ("form", startTagForm)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("table", endTagTable),
-            (("body", "caption", "col", "colgroup", "html", "tbody", "td",
-              "tfoot", "th", "thead", "tr"), endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InTableTextPhase(Phase):
-        __slots__ = ("originalPhase", "characterTokens")
-
-        def __init__(self, *args, **kwargs):
-            super(InTableTextPhase, self).__init__(*args, **kwargs)
-            self.originalPhase = None
-            self.characterTokens = []
-
-        def flushCharacters(self):
-            data = "".join([item["data"] for item in self.characterTokens])
-            if any([item not in spaceCharacters for item in data]):
-                token = {"type": tokenTypes["Characters"], "data": data}
-                self.parser.phases["inTable"].insertText(token)
-            elif data:
-                self.tree.insertText(data)
-            self.characterTokens = []
-
-        def processComment(self, token):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
-            return token
-
-        def processEOF(self):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
-            return True
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                return
-            self.characterTokens.append(token)
-
-        def processSpaceCharacters(self, token):
-            # pretty sure we should never reach here
-            self.characterTokens.append(token)
-    #        assert False
-
-        def processStartTag(self, token):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
-            return token
-
-        def processEndTag(self, token):
-            self.flushCharacters()
-            self.parser.phase = self.originalPhase
-            return token
-
-    class InCaptionPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
-        __slots__ = tuple()
-
-        def ignoreEndTagCaption(self):
-            return not self.tree.elementInScope("caption", variant="table")
-
-        def processEOF(self):
-            self.parser.phases["inBody"].processEOF()
-
-        def processCharacters(self, token):
-            return self.parser.phases["inBody"].processCharacters(token)
-
-        def startTagTableElement(self, token):
-            self.parser.parseError()
-            # XXX Have to duplicate logic here to find out if the tag is ignored
-            ignoreEndTag = self.ignoreEndTagCaption()
-            self.parser.phase.processEndTag(impliedTagToken("caption"))
-            if not ignoreEndTag:
-                return token
-
-        def startTagOther(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def endTagCaption(self, token):
-            if not self.ignoreEndTagCaption():
-                # AT this code is quite similar to endTagTable in "InTable"
-                self.tree.generateImpliedEndTags()
-                if self.tree.openElements[-1].name != "caption":
-                    self.parser.parseError("expected-one-end-tag-but-got-another",
-                                           {"gotName": "caption",
-                                            "expectedName": self.tree.openElements[-1].name})
-                while self.tree.openElements[-1].name != "caption":
-                    self.tree.openElements.pop()
-                self.tree.openElements.pop()
-                self.tree.clearActiveFormattingElements()
-                self.parser.phase = self.parser.phases["inTable"]
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagTable(self, token):
-            self.parser.parseError()
-            ignoreEndTag = self.ignoreEndTagCaption()
-            self.parser.phase.processEndTag(impliedTagToken("caption"))
-            if not ignoreEndTag:
-                return token
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def endTagOther(self, token):
-            return self.parser.phases["inBody"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-              "thead", "tr"), startTagTableElement)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("caption", endTagCaption),
-            ("table", endTagTable),
-            (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
-              "thead", "tr"), endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InColumnGroupPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-column
-        __slots__ = tuple()
-
-        def ignoreEndTagColgroup(self):
-            return self.tree.openElements[-1].name == "html"
-
-        def processEOF(self):
-            if self.tree.openElements[-1].name == "html":
-                assert self.parser.innerHTML
-                return
-            else:
-                ignoreEndTag = self.ignoreEndTagColgroup()
-                self.endTagColgroup(impliedTagToken("colgroup"))
-                if not ignoreEndTag:
-                    return True
-
-        def processCharacters(self, token):
-            ignoreEndTag = self.ignoreEndTagColgroup()
-            self.endTagColgroup(impliedTagToken("colgroup"))
-            if not ignoreEndTag:
-                return token
-
-        def startTagCol(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-            token["selfClosingAcknowledged"] = True
-
-        def startTagOther(self, token):
-            ignoreEndTag = self.ignoreEndTagColgroup()
-            self.endTagColgroup(impliedTagToken("colgroup"))
-            if not ignoreEndTag:
-                return token
-
-        def endTagColgroup(self, token):
-            if self.ignoreEndTagColgroup():
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-            else:
-                self.tree.openElements.pop()
-                self.parser.phase = self.parser.phases["inTable"]
-
-        def endTagCol(self, token):
-            self.parser.parseError("no-end-tag", {"name": "col"})
-
-        def endTagOther(self, token):
-            ignoreEndTag = self.ignoreEndTagColgroup()
-            self.endTagColgroup(impliedTagToken("colgroup"))
-            if not ignoreEndTag:
-                return token
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("col", startTagCol)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("colgroup", endTagColgroup),
-            ("col", endTagCol)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InTableBodyPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
-        __slots__ = tuple()
-
-        # helper methods
-        def clearStackToTableBodyContext(self):
-            while self.tree.openElements[-1].name not in ("tbody", "tfoot",
-                                                          "thead", "html"):
-                # self.parser.parseError("unexpected-implied-end-tag-in-table",
-                #  {"name": self.tree.openElements[-1].name})
-                self.tree.openElements.pop()
-            if self.tree.openElements[-1].name == "html":
-                assert self.parser.innerHTML
-
-        # the rest
-        def processEOF(self):
-            self.parser.phases["inTable"].processEOF()
-
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inTable"].processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            return self.parser.phases["inTable"].processCharacters(token)
-
-        def startTagTr(self, token):
-            self.clearStackToTableBodyContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inRow"]
-
-        def startTagTableCell(self, token):
-            self.parser.parseError("unexpected-cell-in-table-body",
-                                   {"name": token["name"]})
-            self.startTagTr(impliedTagToken("tr", "StartTag"))
-            return token
-
-        def startTagTableOther(self, token):
-            # XXX AT Any ideas on how to share this with endTagTable?
-            if (self.tree.elementInScope("tbody", variant="table") or
-                self.tree.elementInScope("thead", variant="table") or
-                    self.tree.elementInScope("tfoot", variant="table")):
-                self.clearStackToTableBodyContext()
-                self.endTagTableRowGroup(
-                    impliedTagToken(self.tree.openElements[-1].name))
-                return token
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def startTagOther(self, token):
-            return self.parser.phases["inTable"].processStartTag(token)
-
-        def endTagTableRowGroup(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.clearStackToTableBodyContext()
-                self.tree.openElements.pop()
-                self.parser.phase = self.parser.phases["inTable"]
-            else:
-                self.parser.parseError("unexpected-end-tag-in-table-body",
-                                       {"name": token["name"]})
-
-        def endTagTable(self, token):
-            if (self.tree.elementInScope("tbody", variant="table") or
-                self.tree.elementInScope("thead", variant="table") or
-                    self.tree.elementInScope("tfoot", variant="table")):
-                self.clearStackToTableBodyContext()
-                self.endTagTableRowGroup(
-                    impliedTagToken(self.tree.openElements[-1].name))
-                return token
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag-in-table-body",
-                                   {"name": token["name"]})
-
-        def endTagOther(self, token):
-            return self.parser.phases["inTable"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("tr", startTagTr),
-            (("td", "th"), startTagTableCell),
-            (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
-             startTagTableOther)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
-            ("table", endTagTable),
-            (("body", "caption", "col", "colgroup", "html", "td", "th",
-              "tr"), endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InRowPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
-        __slots__ = tuple()
-
-        # helper methods (XXX unify this with other table helper methods)
-        def clearStackToTableRowContext(self):
-            while self.tree.openElements[-1].name not in ("tr", "html"):
-                self.parser.parseError("unexpected-implied-end-tag-in-table-row",
-                                       {"name": self.tree.openElements[-1].name})
-                self.tree.openElements.pop()
-
-        def ignoreEndTagTr(self):
-            return not self.tree.elementInScope("tr", variant="table")
-
-        # the rest
-        def processEOF(self):
-            self.parser.phases["inTable"].processEOF()
-
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inTable"].processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            return self.parser.phases["inTable"].processCharacters(token)
-
-        def startTagTableCell(self, token):
-            self.clearStackToTableRowContext()
-            self.tree.insertElement(token)
-            self.parser.phase = self.parser.phases["inCell"]
-            self.tree.activeFormattingElements.append(Marker)
-
-        def startTagTableOther(self, token):
-            ignoreEndTag = self.ignoreEndTagTr()
-            self.endTagTr(impliedTagToken("tr"))
-            # XXX how are we sure it's always ignored in the innerHTML case?
-            if not ignoreEndTag:
-                return token
-
-        def startTagOther(self, token):
-            return self.parser.phases["inTable"].processStartTag(token)
-
-        def endTagTr(self, token):
-            if not self.ignoreEndTagTr():
-                self.clearStackToTableRowContext()
-                self.tree.openElements.pop()
-                self.parser.phase = self.parser.phases["inTableBody"]
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagTable(self, token):
-            ignoreEndTag = self.ignoreEndTagTr()
-            self.endTagTr(impliedTagToken("tr"))
-            # Reprocess the current tag if the tr end tag was not ignored
-            # XXX how are we sure it's always ignored in the innerHTML case?
-            if not ignoreEndTag:
-                return token
-
-        def endTagTableRowGroup(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.endTagTr(impliedTagToken("tr"))
-                return token
-            else:
-                self.parser.parseError()
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag-in-table-row",
-                                   {"name": token["name"]})
-
-        def endTagOther(self, token):
-            return self.parser.phases["inTable"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("td", "th"), startTagTableCell),
-            (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
-              "tr"), startTagTableOther)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("tr", endTagTr),
-            ("table", endTagTable),
-            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
-            (("body", "caption", "col", "colgroup", "html", "td", "th"),
-             endTagIgnore)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InCellPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
-        __slots__ = tuple()
-
-        # helper
-        def closeCell(self):
-            if self.tree.elementInScope("td", variant="table"):
-                self.endTagTableCell(impliedTagToken("td"))
-            elif self.tree.elementInScope("th", variant="table"):
-                self.endTagTableCell(impliedTagToken("th"))
-
-        # the rest
-        def processEOF(self):
-            self.parser.phases["inBody"].processEOF()
-
-        def processCharacters(self, token):
-            return self.parser.phases["inBody"].processCharacters(token)
-
-        def startTagTableOther(self, token):
-            if (self.tree.elementInScope("td", variant="table") or
-                    self.tree.elementInScope("th", variant="table")):
-                self.closeCell()
-                return token
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def startTagOther(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def endTagTableCell(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.tree.generateImpliedEndTags(token["name"])
-                if self.tree.openElements[-1].name != token["name"]:
-                    self.parser.parseError("unexpected-cell-end-tag",
-                                           {"name": token["name"]})
-                    while True:
-                        node = self.tree.openElements.pop()
-                        if node.name == token["name"]:
-                            break
-                else:
-                    self.tree.openElements.pop()
-                self.tree.clearActiveFormattingElements()
-                self.parser.phase = self.parser.phases["inRow"]
-            else:
-                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def endTagIgnore(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-        def endTagImply(self, token):
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.closeCell()
-                return token
-            else:
-                # sometimes innerHTML case
-                self.parser.parseError()
-
-        def endTagOther(self, token):
-            return self.parser.phases["inBody"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
-              "thead", "tr"), startTagTableOther)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("td", "th"), endTagTableCell),
-            (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
-            (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InSelectPhase(Phase):
-        __slots__ = tuple()
-
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-select
-        def processEOF(self):
-            if self.tree.openElements[-1].name != "html":
-                self.parser.parseError("eof-in-select")
-            else:
-                assert self.parser.innerHTML
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                return
-            self.tree.insertText(token["data"])
-
-        def startTagOption(self, token):
-            # We need to imply </option> if <option> is the current node.
-            if self.tree.openElements[-1].name == "option":
-                self.tree.openElements.pop()
-            self.tree.insertElement(token)
-
-        def startTagOptgroup(self, token):
-            if self.tree.openElements[-1].name == "option":
-                self.tree.openElements.pop()
-            if self.tree.openElements[-1].name == "optgroup":
-                self.tree.openElements.pop()
-            self.tree.insertElement(token)
-
-        def startTagSelect(self, token):
-            self.parser.parseError("unexpected-select-in-select")
-            self.endTagSelect(impliedTagToken("select"))
-
-        def startTagInput(self, token):
-            self.parser.parseError("unexpected-input-in-select")
-            if self.tree.elementInScope("select", variant="select"):
-                self.endTagSelect(impliedTagToken("select"))
-                return token
-            else:
-                assert self.parser.innerHTML
-
-        def startTagScript(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-in-select",
-                                   {"name": token["name"]})
-
-        def endTagOption(self, token):
-            if self.tree.openElements[-1].name == "option":
-                self.tree.openElements.pop()
-            else:
-                self.parser.parseError("unexpected-end-tag-in-select",
-                                       {"name": "option"})
-
-        def endTagOptgroup(self, token):
-            # </optgroup> implicitly closes <option>
-            if (self.tree.openElements[-1].name == "option" and
-                    self.tree.openElements[-2].name == "optgroup"):
-                self.tree.openElements.pop()
-            # It also closes </optgroup>
-            if self.tree.openElements[-1].name == "optgroup":
-                self.tree.openElements.pop()
-            # But nothing else
-            else:
-                self.parser.parseError("unexpected-end-tag-in-select",
-                                       {"name": "optgroup"})
-
-        def endTagSelect(self, token):
-            if self.tree.elementInScope("select", variant="select"):
-                node = self.tree.openElements.pop()
-                while node.name != "select":
-                    node = self.tree.openElements.pop()
-                self.parser.resetInsertionMode()
-            else:
-                # innerHTML case
-                assert self.parser.innerHTML
-                self.parser.parseError()
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-in-select",
-                                   {"name": token["name"]})
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("option", startTagOption),
-            ("optgroup", startTagOptgroup),
-            ("select", startTagSelect),
-            (("input", "keygen", "textarea"), startTagInput),
-            ("script", startTagScript)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("option", endTagOption),
-            ("optgroup", endTagOptgroup),
-            ("select", endTagSelect)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InSelectInTablePhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            self.parser.phases["inSelect"].processEOF()
-
-        def processCharacters(self, token):
-            return self.parser.phases["inSelect"].processCharacters(token)
-
-        def startTagTable(self, token):
-            self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
-            self.endTagOther(impliedTagToken("select"))
-            return token
-
-        def startTagOther(self, token):
-            return self.parser.phases["inSelect"].processStartTag(token)
-
-        def endTagTable(self, token):
-            self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
-            if self.tree.elementInScope(token["name"], variant="table"):
-                self.endTagOther(impliedTagToken("select"))
-                return token
-
-        def endTagOther(self, token):
-            return self.parser.phases["inSelect"].processEndTag(token)
-
-        startTagHandler = _utils.MethodDispatcher([
-            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-             startTagTable)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
-             endTagTable)
-        ])
-        endTagHandler.default = endTagOther
-
-    class InForeignContentPhase(Phase):
-        __slots__ = tuple()
-
-        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
-                                      "center", "code", "dd", "div", "dl", "dt",
-                                      "em", "embed", "h1", "h2", "h3",
-                                      "h4", "h5", "h6", "head", "hr", "i", "img",
-                                      "li", "listing", "menu", "meta", "nobr",
-                                      "ol", "p", "pre", "ruby", "s", "small",
-                                      "span", "strong", "strike", "sub", "sup",
-                                      "table", "tt", "u", "ul", "var"])
-
-        def adjustSVGTagNames(self, token):
-            replacements = {"altglyph": "altGlyph",
-                            "altglyphdef": "altGlyphDef",
-                            "altglyphitem": "altGlyphItem",
-                            "animatecolor": "animateColor",
-                            "animatemotion": "animateMotion",
-                            "animatetransform": "animateTransform",
-                            "clippath": "clipPath",
-                            "feblend": "feBlend",
-                            "fecolormatrix": "feColorMatrix",
-                            "fecomponenttransfer": "feComponentTransfer",
-                            "fecomposite": "feComposite",
-                            "feconvolvematrix": "feConvolveMatrix",
-                            "fediffuselighting": "feDiffuseLighting",
-                            "fedisplacementmap": "feDisplacementMap",
-                            "fedistantlight": "feDistantLight",
-                            "feflood": "feFlood",
-                            "fefunca": "feFuncA",
-                            "fefuncb": "feFuncB",
-                            "fefuncg": "feFuncG",
-                            "fefuncr": "feFuncR",
-                            "fegaussianblur": "feGaussianBlur",
-                            "feimage": "feImage",
-                            "femerge": "feMerge",
-                            "femergenode": "feMergeNode",
-                            "femorphology": "feMorphology",
-                            "feoffset": "feOffset",
-                            "fepointlight": "fePointLight",
-                            "fespecularlighting": "feSpecularLighting",
-                            "fespotlight": "feSpotLight",
-                            "fetile": "feTile",
-                            "feturbulence": "feTurbulence",
-                            "foreignobject": "foreignObject",
-                            "glyphref": "glyphRef",
-                            "lineargradient": "linearGradient",
-                            "radialgradient": "radialGradient",
-                            "textpath": "textPath"}
-
-            if token["name"] in replacements:
-                token["name"] = replacements[token["name"]]
-
-        def processCharacters(self, token):
-            if token["data"] == "\u0000":
-                token["data"] = "\uFFFD"
-            elif (self.parser.framesetOK and
-                  any(char not in spaceCharacters for char in token["data"])):
-                self.parser.framesetOK = False
-            Phase.processCharacters(self, token)
-
-        def processStartTag(self, token):
-            currentNode = self.tree.openElements[-1]
-            if (token["name"] in self.breakoutElements or
-                (token["name"] == "font" and
-                 set(token["data"].keys()) & {"color", "face", "size"})):
-                self.parser.parseError("unexpected-html-element-in-foreign-content",
-                                       {"name": token["name"]})
-                while (self.tree.openElements[-1].namespace !=
-                       self.tree.defaultNamespace and
-                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
-                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
-                    self.tree.openElements.pop()
-                return token
-
-            else:
-                if currentNode.namespace == namespaces["mathml"]:
-                    self.parser.adjustMathMLAttributes(token)
-                elif currentNode.namespace == namespaces["svg"]:
-                    self.adjustSVGTagNames(token)
-                    self.parser.adjustSVGAttributes(token)
-                self.parser.adjustForeignAttributes(token)
-                token["namespace"] = currentNode.namespace
-                self.tree.insertElement(token)
-                if token["selfClosing"]:
-                    self.tree.openElements.pop()
-                    token["selfClosingAcknowledged"] = True
-
-        def processEndTag(self, token):
-            nodeIndex = len(self.tree.openElements) - 1
-            node = self.tree.openElements[-1]
-            if node.name.translate(asciiUpper2Lower) != token["name"]:
-                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
-
-            while True:
-                if node.name.translate(asciiUpper2Lower) == token["name"]:
-                    # XXX this isn't in the spec but it seems necessary
-                    if self.parser.phase == self.parser.phases["inTableText"]:
-                        self.parser.phase.flushCharacters()
-                        self.parser.phase = self.parser.phase.originalPhase
-                    while self.tree.openElements.pop() != node:
-                        assert self.tree.openElements
-                    new_token = None
-                    break
-                nodeIndex -= 1
-
-                node = self.tree.openElements[nodeIndex]
-                if node.namespace != self.tree.defaultNamespace:
-                    continue
-                else:
-                    new_token = self.parser.phase.processEndTag(token)
-                    break
-            return new_token
-
-    class AfterBodyPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            # Stop parsing
-            pass
-
-        def processComment(self, token):
-            # This is needed because data is to be appended to the <html> element
-            # here and not to whatever is currently open.
-            self.tree.insertComment(token, self.tree.openElements[0])
-
-        def processCharacters(self, token):
-            self.parser.parseError("unexpected-char-after-body")
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-after-body",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
-
-        def endTagHtml(self, name):
-            if self.parser.innerHTML:
-                self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
-            else:
-                self.parser.phase = self.parser.phases["afterAfterBody"]
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-after-body",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
-        endTagHandler.default = endTagOther
-
-    class InFramesetPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
-        __slots__ = tuple()
-
-        def processEOF(self):
-            if self.tree.openElements[-1].name != "html":
-                self.parser.parseError("eof-in-frameset")
-            else:
-                assert self.parser.innerHTML
-
-        def processCharacters(self, token):
-            self.parser.parseError("unexpected-char-in-frameset")
-
-        def startTagFrameset(self, token):
-            self.tree.insertElement(token)
-
-        def startTagFrame(self, token):
-            self.tree.insertElement(token)
-            self.tree.openElements.pop()
-
-        def startTagNoframes(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-in-frameset",
-                                   {"name": token["name"]})
-
-        def endTagFrameset(self, token):
-            if self.tree.openElements[-1].name == "html":
-                # innerHTML case
-                self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
-            else:
-                self.tree.openElements.pop()
-            if (not self.parser.innerHTML and
-                    self.tree.openElements[-1].name != "frameset"):
-                # If we're not in innerHTML mode and the current node is not a
-                # "frameset" element (anymore) then switch.
-                self.parser.phase = self.parser.phases["afterFrameset"]
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-in-frameset",
-                                   {"name": token["name"]})
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("frameset", startTagFrameset),
-            ("frame", startTagFrame),
-            ("noframes", startTagNoframes)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("frameset", endTagFrameset)
-        ])
-        endTagHandler.default = endTagOther
-
-    class AfterFramesetPhase(Phase):
-        # http://www.whatwg.org/specs/web-apps/current-work/#after3
-        __slots__ = tuple()
-
-        def processEOF(self):
-            # Stop parsing
-            pass
-
-        def processCharacters(self, token):
-            self.parser.parseError("unexpected-char-after-frameset")
-
-        def startTagNoframes(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagOther(self, token):
-            self.parser.parseError("unexpected-start-tag-after-frameset",
-                                   {"name": token["name"]})
-
-        def endTagHtml(self, token):
-            self.parser.phase = self.parser.phases["afterAfterFrameset"]
-
-        def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag-after-frameset",
-                                   {"name": token["name"]})
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", Phase.startTagHtml),
-            ("noframes", startTagNoframes)
-        ])
-        startTagHandler.default = startTagOther
-
-        endTagHandler = _utils.MethodDispatcher([
-            ("html", endTagHtml)
-        ])
-        endTagHandler.default = endTagOther
-
-    class AfterAfterBodyPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            pass
-
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
-
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inBody"].processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            self.parser.parseError("expected-eof-but-got-char")
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagOther(self, token):
-            self.parser.parseError("expected-eof-but-got-start-tag",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
-
-        def processEndTag(self, token):
-            self.parser.parseError("expected-eof-but-got-end-tag",
-                                   {"name": token["name"]})
-            self.parser.phase = self.parser.phases["inBody"]
-            return token
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml)
-        ])
-        startTagHandler.default = startTagOther
-
-    class AfterAfterFramesetPhase(Phase):
-        __slots__ = tuple()
-
-        def processEOF(self):
-            pass
-
-        def processComment(self, token):
-            self.tree.insertComment(token, self.tree.document)
-
-        def processSpaceCharacters(self, token):
-            return self.parser.phases["inBody"].processSpaceCharacters(token)
-
-        def processCharacters(self, token):
-            self.parser.parseError("expected-eof-but-got-char")
-
-        def startTagHtml(self, token):
-            return self.parser.phases["inBody"].processStartTag(token)
-
-        def startTagNoFrames(self, token):
-            return self.parser.phases["inHead"].processStartTag(token)
-
-        def startTagOther(self, token):
-            self.parser.parseError("expected-eof-but-got-start-tag",
-                                   {"name": token["name"]})
-
-        def processEndTag(self, token):
-            self.parser.parseError("expected-eof-but-got-end-tag",
-                                   {"name": token["name"]})
-
-        startTagHandler = _utils.MethodDispatcher([
-            ("html", startTagHtml),
-            ("noframes", startTagNoFrames)
-        ])
-        startTagHandler.default = startTagOther
-
-    # pylint:enable=unused-argument
-
-    return {
-        "initial": InitialPhase,
-        "beforeHtml": BeforeHtmlPhase,
-        "beforeHead": BeforeHeadPhase,
-        "inHead": InHeadPhase,
-        "inHeadNoscript": InHeadNoscriptPhase,
-        "afterHead": AfterHeadPhase,
-        "inBody": InBodyPhase,
-        "text": TextPhase,
-        "inTable": InTablePhase,
-        "inTableText": InTableTextPhase,
-        "inCaption": InCaptionPhase,
-        "inColumnGroup": InColumnGroupPhase,
-        "inTableBody": InTableBodyPhase,
-        "inRow": InRowPhase,
-        "inCell": InCellPhase,
-        "inSelect": InSelectPhase,
-        "inSelectInTable": InSelectInTablePhase,
-        "inForeignContent": InForeignContentPhase,
-        "afterBody": AfterBodyPhase,
-        "inFrameset": InFramesetPhase,
-        "afterFrameset": AfterFramesetPhase,
-        "afterAfterBody": AfterAfterBodyPhase,
-        "afterAfterFrameset": AfterAfterFramesetPhase,
-        # XXX after after frameset
-    }
-
-
-def adjust_attributes(token, replacements):
-    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
-    if needs_adjustment:
-        token['data'] = type(token['data'])((replacements.get(k, k), v)
-                                            for k, v in token['data'].items())
-
-
-def impliedTagToken(name, type="EndTag", attributes=None,
-                    selfClosing=False):
-    if attributes is None:
-        attributes = {}
-    return {"type": tokenTypes[type], "name": name, "data": attributes,
-            "selfClosing": selfClosing}
-
-
-class ParseError(Exception):
-    """Error in parsed document"""
-    pass
diff --git a/src/pip/_vendor/html5lib/serializer.py b/src/pip/_vendor/html5lib/serializer.py
deleted file mode 100644
index d5669d8c1..000000000
--- a/src/pip/_vendor/html5lib/serializer.py
+++ /dev/null
@@ -1,409 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-import re
-
-from codecs import register_error, xmlcharrefreplace_errors
-
-from .constants import voidElements, booleanAttributes, spaceCharacters
-from .constants import rcdataElements, entities, xmlEntities
-from . import treewalkers, _utils
-from xml.sax.saxutils import escape
-
-_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
-_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
-_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
-                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
-                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
-                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
-                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
-                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
-                                   "\u3000]")
-
-
-_encode_entity_map = {}
-_is_ucs4 = len("\U0010FFFF") == 1
-for k, v in list(entities.items()):
-    # skip multi-character entities
-    if ((_is_ucs4 and len(v) > 1) or
-            (not _is_ucs4 and len(v) > 2)):
-        continue
-    if v != "&":
-        if len(v) == 2:
-            v = _utils.surrogatePairToCodepoint(v)
-        else:
-            v = ord(v)
-        if v not in _encode_entity_map or k.islower():
-            # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
-            _encode_entity_map[v] = k
-
-
-def htmlentityreplace_errors(exc):
-    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
-        res = []
-        codepoints = []
-        skip = False
-        for i, c in enumerate(exc.object[exc.start:exc.end]):
-            if skip:
-                skip = False
-                continue
-            index = i + exc.start
-            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
-                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
-                skip = True
-            else:
-                codepoint = ord(c)
-            codepoints.append(codepoint)
-        for cp in codepoints:
-            e = _encode_entity_map.get(cp)
-            if e:
-                res.append("&")
-                res.append(e)
-                if not e.endswith(";"):
-                    res.append(";")
-            else:
-                res.append("&#x%s;" % (hex(cp)[2:]))
-        return ("".join(res), exc.end)
-    else:
-        return xmlcharrefreplace_errors(exc)
-
-
-register_error("htmlentityreplace", htmlentityreplace_errors)
-
-
-def serialize(input, tree="etree", encoding=None, **serializer_opts):
-    """Serializes the input token stream using the specified treewalker
-
-    :arg input: the token stream to serialize
-
-    :arg tree: the treewalker to use
-
-    :arg encoding: the encoding to use
-
-    :arg serializer_opts: any options to pass to the
-        :py:class:`html5lib.serializer.HTMLSerializer` that gets created
-
-    :returns: the tree serialized as a string
-
-    Example:
-
-    >>> from html5lib.html5parser import parse
-    >>> from html5lib.serializer import serialize
-    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
-    >>> serialize(token_stream, omit_optional_tags=False)
-    '<html><head></head><body><p>Hi!</p></body></html>'
-
-    """
-    # XXX: Should we cache this?
-    walker = treewalkers.getTreeWalker(tree)
-    s = HTMLSerializer(**serializer_opts)
-    return s.render(walker(input), encoding)
-
-
-class HTMLSerializer(object):
-
-    # attribute quoting options
-    quote_attr_values = "legacy"  # be secure by default
-    quote_char = '"'
-    use_best_quote_char = True
-
-    # tag syntax options
-    omit_optional_tags = True
-    minimize_boolean_attributes = True
-    use_trailing_solidus = False
-    space_before_trailing_solidus = True
-
-    # escaping options
-    escape_lt_in_attrs = False
-    escape_rcdata = False
-    resolve_entities = True
-
-    # miscellaneous options
-    alphabetical_attributes = False
-    inject_meta_charset = True
-    strip_whitespace = False
-    sanitize = False
-
-    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
-               "omit_optional_tags", "minimize_boolean_attributes",
-               "use_trailing_solidus", "space_before_trailing_solidus",
-               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
-               "alphabetical_attributes", "inject_meta_charset",
-               "strip_whitespace", "sanitize")
-
-    def __init__(self, **kwargs):
-        """Initialize HTMLSerializer
-
-        :arg inject_meta_charset: Whether or not to inject the meta charset.
-
-            Defaults to ``True``.
-
-        :arg quote_attr_values: Whether to quote attribute values that don't
-            require quoting per legacy browser behavior (``"legacy"``), when
-            required by the standard (``"spec"``), or always (``"always"``).
-
-            Defaults to ``"legacy"``.
-
-        :arg quote_char: Use given quote character for attribute quoting.
-
-            Defaults to ``"`` which will use double quotes unless attribute
-            value contains a double quote, in which case single quotes are
-            used.
-
-        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
-            values.
-
-            Defaults to ``False``.
-
-        :arg escape_rcdata: Whether to escape characters that need to be
-            escaped within normal elements within rcdata elements such as
-            style.
-
-            Defaults to ``False``.
-
-        :arg resolve_entities: Whether to resolve named character entities that
-            appear in the source tree. The XML predefined entities &lt; &gt;
-            &amp; &quot; &apos; are unaffected by this setting.
-
-            Defaults to ``True``.
-
-        :arg strip_whitespace: Whether to remove semantically meaningless
-            whitespace. (This compresses all whitespace to a single space
-            except within ``pre``.)
-
-            Defaults to ``False``.
-
-        :arg minimize_boolean_attributes: Shortens boolean attributes to give
-            just the attribute value, for example::
-
-              <input disabled="disabled">
-
-            becomes::
-
-              <input disabled>
-
-            Defaults to ``True``.
-
-        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
-            start tag of void elements (empty elements whose end tag is
-            forbidden). E.g. ``<hr/>``.
-
-            Defaults to ``False``.
-
-        :arg space_before_trailing_solidus: Places a space immediately before
-            the closing slash in a tag using a trailing solidus. E.g.
-            ``<hr />``. Requires ``use_trailing_solidus=True``.
-
-            Defaults to ``True``.
-
-        :arg sanitize: Strip all unsafe or unknown constructs from output.
-            See :py:class:`html5lib.filters.sanitizer.Filter`.
-
-            Defaults to ``False``.
-
-        :arg omit_optional_tags: Omit start/end tags that are optional.
-
-            Defaults to ``True``.
-
-        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
-
-            Defaults to ``False``.
-
-        """
-        unexpected_args = frozenset(kwargs) - frozenset(self.options)
-        if len(unexpected_args) > 0:
-            raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
-        if 'quote_char' in kwargs:
-            self.use_best_quote_char = False
-        for attr in self.options:
-            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
-        self.errors = []
-        self.strict = False
-
-    def encode(self, string):
-        assert(isinstance(string, text_type))
-        if self.encoding:
-            return string.encode(self.encoding, "htmlentityreplace")
-        else:
-            return string
-
-    def encodeStrict(self, string):
-        assert(isinstance(string, text_type))
-        if self.encoding:
-            return string.encode(self.encoding, "strict")
-        else:
-            return string
-
-    def serialize(self, treewalker, encoding=None):
-        # pylint:disable=too-many-nested-blocks
-        self.encoding = encoding
-        in_cdata = False
-        self.errors = []
-
-        if encoding and self.inject_meta_charset:
-            from .filters.inject_meta_charset import Filter
-            treewalker = Filter(treewalker, encoding)
-        # Alphabetical attributes is here under the assumption that none of
-        # the later filters add or change order of attributes; it needs to be
-        # before the sanitizer so escaped elements come out correctly
-        if self.alphabetical_attributes:
-            from .filters.alphabeticalattributes import Filter
-            treewalker = Filter(treewalker)
-        # WhitespaceFilter should be used before OptionalTagFilter
-        # for maximum efficiently of this latter filter
-        if self.strip_whitespace:
-            from .filters.whitespace import Filter
-            treewalker = Filter(treewalker)
-        if self.sanitize:
-            from .filters.sanitizer import Filter
-            treewalker = Filter(treewalker)
-        if self.omit_optional_tags:
-            from .filters.optionaltags import Filter
-            treewalker = Filter(treewalker)
-
-        for token in treewalker:
-            type = token["type"]
-            if type == "Doctype":
-                doctype = "<!DOCTYPE %s" % token["name"]
-
-                if token["publicId"]:
-                    doctype += ' PUBLIC "%s"' % token["publicId"]
-                elif token["systemId"]:
-                    doctype += " SYSTEM"
-                if token["systemId"]:
-                    if token["systemId"].find('"') >= 0:
-                        if token["systemId"].find("'") >= 0:
-                            self.serializeError("System identifier contains both single and double quote characters")
-                        quote_char = "'"
-                    else:
-                        quote_char = '"'
-                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
-
-                doctype += ">"
-                yield self.encodeStrict(doctype)
-
-            elif type in ("Characters", "SpaceCharacters"):
-                if type == "SpaceCharacters" or in_cdata:
-                    if in_cdata and token["data"].find("</") >= 0:
-                        self.serializeError("Unexpected </ in CDATA")
-                    yield self.encode(token["data"])
-                else:
-                    yield self.encode(escape(token["data"]))
-
-            elif type in ("StartTag", "EmptyTag"):
-                name = token["name"]
-                yield self.encodeStrict("<%s" % name)
-                if name in rcdataElements and not self.escape_rcdata:
-                    in_cdata = True
-                elif in_cdata:
-                    self.serializeError("Unexpected child element of a CDATA element")
-                for (_, attr_name), attr_value in token["data"].items():
-                    # TODO: Add namespace support here
-                    k = attr_name
-                    v = attr_value
-                    yield self.encodeStrict(' ')
-
-                    yield self.encodeStrict(k)
-                    if not self.minimize_boolean_attributes or \
-                        (k not in booleanAttributes.get(name, tuple()) and
-                         k not in booleanAttributes.get("", tuple())):
-                        yield self.encodeStrict("=")
-                        if self.quote_attr_values == "always" or len(v) == 0:
-                            quote_attr = True
-                        elif self.quote_attr_values == "spec":
-                            quote_attr = _quoteAttributeSpec.search(v) is not None
-                        elif self.quote_attr_values == "legacy":
-                            quote_attr = _quoteAttributeLegacy.search(v) is not None
-                        else:
-                            raise ValueError("quote_attr_values must be one of: "
-                                             "'always', 'spec', or 'legacy'")
-                        v = v.replace("&", "&amp;")
-                        if self.escape_lt_in_attrs:
-                            v = v.replace("<", "&lt;")
-                        if quote_attr:
-                            quote_char = self.quote_char
-                            if self.use_best_quote_char:
-                                if "'" in v and '"' not in v:
-                                    quote_char = '"'
-                                elif '"' in v and "'" not in v:
-                                    quote_char = "'"
-                            if quote_char == "'":
-                                v = v.replace("'", "&#39;")
-                            else:
-                                v = v.replace('"', "&quot;")
-                            yield self.encodeStrict(quote_char)
-                            yield self.encode(v)
-                            yield self.encodeStrict(quote_char)
-                        else:
-                            yield self.encode(v)
-                if name in voidElements and self.use_trailing_solidus:
-                    if self.space_before_trailing_solidus:
-                        yield self.encodeStrict(" /")
-                    else:
-                        yield self.encodeStrict("/")
-                yield self.encode(">")
-
-            elif type == "EndTag":
-                name = token["name"]
-                if name in rcdataElements:
-                    in_cdata = False
-                elif in_cdata:
-                    self.serializeError("Unexpected child element of a CDATA element")
-                yield self.encodeStrict("</%s>" % name)
-
-            elif type == "Comment":
-                data = token["data"]
-                if data.find("--") >= 0:
-                    self.serializeError("Comment contains --")
-                yield self.encodeStrict("<!--%s-->" % token["data"])
-
-            elif type == "Entity":
-                name = token["name"]
-                key = name + ";"
-                if key not in entities:
-                    self.serializeError("Entity %s not recognized" % name)
-                if self.resolve_entities and key not in xmlEntities:
-                    data = entities[key]
-                else:
-                    data = "&%s;" % name
-                yield self.encodeStrict(data)
-
-            else:
-                self.serializeError(token["data"])
-
-    def render(self, treewalker, encoding=None):
-        """Serializes the stream from the treewalker into a string
-
-        :arg treewalker: the treewalker to serialize
-
-        :arg encoding: the string encoding to use
-
-        :returns: the serialized tree
-
-        Example:
-
-        >>> from html5lib import parse, getTreeWalker
-        >>> from html5lib.serializer import HTMLSerializer
-        >>> token_stream = parse('<html><body>Hi!</body></html>')
-        >>> walker = getTreeWalker('etree')
-        >>> serializer = HTMLSerializer(omit_optional_tags=False)
-        >>> serializer.render(walker(token_stream))
-        '<html><head></head><body>Hi!</body></html>'
-
-        """
-        if encoding:
-            return b"".join(list(self.serialize(treewalker, encoding)))
-        else:
-            return "".join(list(self.serialize(treewalker)))
-
-    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
-        # XXX The idea is to make data mandatory.
-        self.errors.append(data)
-        if self.strict:
-            raise SerializeError
-
-
-class SerializeError(Exception):
-    """Error in serialized tree"""
-    pass
diff --git a/src/pip/_vendor/html5lib/treeadapters/__init__.py b/src/pip/_vendor/html5lib/treeadapters/__init__.py
deleted file mode 100644
index 7ef59590c..000000000
--- a/src/pip/_vendor/html5lib/treeadapters/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Tree adapters let you convert from one tree structure to another
-
-Example:
-
-.. code-block:: python
-
-   from pip._vendor import html5lib
-   from pip._vendor.html5lib.treeadapters import genshi
-
-   doc = '<html><body>Hi!</body></html>'
-   treebuilder = html5lib.getTreeBuilder('etree')
-   parser = html5lib.HTMLParser(tree=treebuilder)
-   tree = parser.parse(doc)
-   TreeWalker = html5lib.getTreeWalker('etree')
-
-   genshi_tree = genshi.to_genshi(TreeWalker(tree))
-
-"""
-from __future__ import absolute_import, division, unicode_literals
-
-from . import sax
-
-__all__ = ["sax"]
-
-try:
-    from . import genshi  # noqa
-except ImportError:
-    pass
-else:
-    __all__.append("genshi")
diff --git a/src/pip/_vendor/html5lib/treeadapters/genshi.py b/src/pip/_vendor/html5lib/treeadapters/genshi.py
deleted file mode 100644
index 61d5fb6ac..000000000
--- a/src/pip/_vendor/html5lib/treeadapters/genshi.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from genshi.core import QName, Attrs
-from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
-
-
-def to_genshi(walker):
-    """Convert a tree to a genshi tree
-
-    :arg walker: the treewalker to use to walk the tree to convert it
-
-    :returns: generator of genshi nodes
-
-    """
-    text = []
-    for token in walker:
-        type = token["type"]
-        if type in ("Characters", "SpaceCharacters"):
-            text.append(token["data"])
-        elif text:
-            yield TEXT, "".join(text), (None, -1, -1)
-            text = []
-
-        if type in ("StartTag", "EmptyTag"):
-            if token["namespace"]:
-                name = "{%s}%s" % (token["namespace"], token["name"])
-            else:
-                name = token["name"]
-            attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
-                           for attr, value in token["data"].items()])
-            yield (START, (QName(name), attrs), (None, -1, -1))
-            if type == "EmptyTag":
-                type = "EndTag"
-
-        if type == "EndTag":
-            if token["namespace"]:
-                name = "{%s}%s" % (token["namespace"], token["name"])
-            else:
-                name = token["name"]
-
-            yield END, QName(name), (None, -1, -1)
-
-        elif type == "Comment":
-            yield COMMENT, token["data"], (None, -1, -1)
-
-        elif type == "Doctype":
-            yield DOCTYPE, (token["name"], token["publicId"],
-                            token["systemId"]), (None, -1, -1)
-
-        else:
-            pass  # FIXME: What to do?
-
-    if text:
-        yield TEXT, "".join(text), (None, -1, -1)
diff --git a/src/pip/_vendor/html5lib/treeadapters/sax.py b/src/pip/_vendor/html5lib/treeadapters/sax.py
deleted file mode 100644
index f4ccea5a2..000000000
--- a/src/pip/_vendor/html5lib/treeadapters/sax.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.sax.xmlreader import AttributesNSImpl
-
-from ..constants import adjustForeignAttributes, unadjustForeignAttributes
-
-prefix_mapping = {}
-for prefix, localName, namespace in adjustForeignAttributes.values():
-    if prefix is not None:
-        prefix_mapping[prefix] = namespace
-
-
-def to_sax(walker, handler):
-    """Call SAX-like content handler based on treewalker walker
-
-    :arg walker: the treewalker to use to walk the tree to convert it
-
-    :arg handler: SAX handler to use
-
-    """
-    handler.startDocument()
-    for prefix, namespace in prefix_mapping.items():
-        handler.startPrefixMapping(prefix, namespace)
-
-    for token in walker:
-        type = token["type"]
-        if type == "Doctype":
-            continue
-        elif type in ("StartTag", "EmptyTag"):
-            attrs = AttributesNSImpl(token["data"],
-                                     unadjustForeignAttributes)
-            handler.startElementNS((token["namespace"], token["name"]),
-                                   token["name"],
-                                   attrs)
-            if type == "EmptyTag":
-                handler.endElementNS((token["namespace"], token["name"]),
-                                     token["name"])
-        elif type == "EndTag":
-            handler.endElementNS((token["namespace"], token["name"]),
-                                 token["name"])
-        elif type in ("Characters", "SpaceCharacters"):
-            handler.characters(token["data"])
-        elif type == "Comment":
-            pass
-        else:
-            assert False, "Unknown token type"
-
-    for prefix, namespace in prefix_mapping.items():
-        handler.endPrefixMapping(prefix)
-    handler.endDocument()
diff --git a/src/pip/_vendor/html5lib/treebuilders/__init__.py b/src/pip/_vendor/html5lib/treebuilders/__init__.py
deleted file mode 100644
index d44447eaf..000000000
--- a/src/pip/_vendor/html5lib/treebuilders/__init__.py
+++ /dev/null
@@ -1,88 +0,0 @@
-"""A collection of modules for building different kinds of trees from HTML
-documents.
-
-To create a treebuilder for a new type of tree, you need to do
-implement several things:
-
-1. A set of classes for various types of elements: Document, Doctype, Comment,
-   Element. These must implement the interface of ``base.treebuilders.Node``
-   (although comment nodes have a different signature for their constructor,
-   see ``treebuilders.etree.Comment``) Textual content may also be implemented
-   as another node type, or not, as your tree implementation requires.
-
-2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
-   from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
-
-   * ``documentClass`` - the class to use for the bottommost node of a document
-   * ``elementClass`` - the class to use for HTML Elements
-   * ``commentClass`` - the class to use for comments
-   * ``doctypeClass`` - the class to use for doctypes
-
-   It also has one required method:
-
-   * ``getDocument`` - Returns the root node of the complete document tree
-
-3. If you wish to run the unit tests, you must also create a ``testSerializer``
-   method on your treebuilder which accepts a node and returns a string
-   containing Node and its children serialized according to the format used in
-   the unittests
-
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from .._utils import default_etree
-
-treeBuilderCache = {}
-
-
-def getTreeBuilder(treeType, implementation=None, **kwargs):
-    """Get a TreeBuilder class for various types of trees with built-in support
-
-    :arg treeType: the name of the tree type required (case-insensitive). Supported
-        values are:
-
-        * "dom" - A generic builder for DOM implementations, defaulting to a
-          xml.dom.minidom based implementation.
-        * "etree" - A generic builder for tree implementations exposing an
-          ElementTree-like interface, defaulting to xml.etree.cElementTree if
-          available and xml.etree.ElementTree if not.
-        * "lxml" - A etree-based builder for lxml.etree, handling limitations
-          of lxml's implementation.
-
-    :arg implementation: (Currently applies to the "etree" and "dom" tree
-        types). A module implementing the tree type e.g. xml.etree.ElementTree
-        or xml.etree.cElementTree.
-
-    :arg kwargs: Any additional options to pass to the TreeBuilder when
-        creating it.
-
-    Example:
-
-    >>> from html5lib.treebuilders import getTreeBuilder
-    >>> builder = getTreeBuilder('etree')
-
-    """
-
-    treeType = treeType.lower()
-    if treeType not in treeBuilderCache:
-        if treeType == "dom":
-            from . import dom
-            # Come up with a sane default (pref. from the stdlib)
-            if implementation is None:
-                from xml.dom import minidom
-                implementation = minidom
-            # NEVER cache here, caching is done in the dom submodule
-            return dom.getDomModule(implementation, **kwargs).TreeBuilder
-        elif treeType == "lxml":
-            from . import etree_lxml
-            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
-        elif treeType == "etree":
-            from . import etree
-            if implementation is None:
-                implementation = default_etree
-            # NEVER cache here, caching is done in the etree submodule
-            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
-        else:
-            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
-    return treeBuilderCache.get(treeType)
diff --git a/src/pip/_vendor/html5lib/treebuilders/base.py b/src/pip/_vendor/html5lib/treebuilders/base.py
deleted file mode 100644
index 965fce29d..000000000
--- a/src/pip/_vendor/html5lib/treebuilders/base.py
+++ /dev/null
@@ -1,417 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from ..constants import scopingElements, tableInsertModeElements, namespaces
-
-# The scope markers are inserted when entering object elements,
-# marquees, table cells, and table captions, and are used to prevent formatting
-# from "leaking" into tables, object elements, and marquees.
-Marker = None
-
-listElementsMap = {
-    None: (frozenset(scopingElements), False),
-    "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
-    "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
-                                          (namespaces["html"], "ul")}), False),
-    "table": (frozenset([(namespaces["html"], "html"),
-                         (namespaces["html"], "table")]), False),
-    "select": (frozenset([(namespaces["html"], "optgroup"),
-                          (namespaces["html"], "option")]), True)
-}
-
-
-class Node(object):
-    """Represents an item in the tree"""
-    def __init__(self, name):
-        """Creates a Node
-
-        :arg name: The tag name associated with the node
-
-        """
-        # The tag name associated with the node
-        self.name = name
-        # The parent of the current node (or None for the document node)
-        self.parent = None
-        # The value of the current node (applies to text nodes and comments)
-        self.value = None
-        # A dict holding name -> value pairs for attributes of the node
-        self.attributes = {}
-        # A list of child nodes of the current node. This must include all
-        # elements but not necessarily other node types.
-        self.childNodes = []
-        # A list of miscellaneous flags that can be set on the node.
-        self._flags = []
-
-    def __str__(self):
-        attributesStr = " ".join(["%s=\"%s\"" % (name, value)
-                                  for name, value in
-                                  self.attributes.items()])
-        if attributesStr:
-            return "<%s %s>" % (self.name, attributesStr)
-        else:
-            return "<%s>" % (self.name)
-
-    def __repr__(self):
-        return "<%s>" % (self.name)
-
-    def appendChild(self, node):
-        """Insert node as a child of the current node
-
-        :arg node: the node to insert
-
-        """
-        raise NotImplementedError
-
-    def insertText(self, data, insertBefore=None):
-        """Insert data as text in the current node, positioned before the
-        start of node insertBefore or to the end of the node's text.
-
-        :arg data: the data to insert
-
-        :arg insertBefore: True if you want to insert the text before the node
-            and False if you want to insert it after the node
-
-        """
-        raise NotImplementedError
-
-    def insertBefore(self, node, refNode):
-        """Insert node as a child of the current node, before refNode in the
-        list of child nodes. Raises ValueError if refNode is not a child of
-        the current node
-
-        :arg node: the node to insert
-
-        :arg refNode: the child node to insert the node before
-
-        """
-        raise NotImplementedError
-
-    def removeChild(self, node):
-        """Remove node from the children of the current node
-
-        :arg node: the child node to remove
-
-        """
-        raise NotImplementedError
-
-    def reparentChildren(self, newParent):
-        """Move all the children of the current node to newParent.
-        This is needed so that trees that don't store text as nodes move the
-        text in the correct way
-
-        :arg newParent: the node to move all this node's children to
-
-        """
-        # XXX - should this method be made more general?
-        for child in self.childNodes:
-            newParent.appendChild(child)
-        self.childNodes = []
-
-    def cloneNode(self):
-        """Return a shallow copy of the current node i.e. a node with the same
-        name and attributes but with no parent or child nodes
-        """
-        raise NotImplementedError
-
-    def hasContent(self):
-        """Return true if the node has children or text, false otherwise
-        """
-        raise NotImplementedError
-
-
-class ActiveFormattingElements(list):
-    def append(self, node):
-        equalCount = 0
-        if node != Marker:
-            for element in self[::-1]:
-                if element == Marker:
-                    break
-                if self.nodesEqual(element, node):
-                    equalCount += 1
-                if equalCount == 3:
-                    self.remove(element)
-                    break
-        list.append(self, node)
-
-    def nodesEqual(self, node1, node2):
-        if not node1.nameTuple == node2.nameTuple:
-            return False
-
-        if not node1.attributes == node2.attributes:
-            return False
-
-        return True
-
-
-class TreeBuilder(object):
-    """Base treebuilder implementation
-
-    * documentClass - the class to use for the bottommost node of a document
-    * elementClass - the class to use for HTML Elements
-    * commentClass - the class to use for comments
-    * doctypeClass - the class to use for doctypes
-
-    """
-    # pylint:disable=not-callable
-
-    # Document class
-    documentClass = None
-
-    # The class to use for creating a node
-    elementClass = None
-
-    # The class to use for creating comments
-    commentClass = None
-
-    # The class to use for creating doctypes
-    doctypeClass = None
-
-    # Fragment class
-    fragmentClass = None
-
-    def __init__(self, namespaceHTMLElements):
-        """Create a TreeBuilder
-
-        :arg namespaceHTMLElements: whether or not to namespace HTML elements
-
-        """
-        if namespaceHTMLElements:
-            self.defaultNamespace = "http://www.w3.org/1999/xhtml"
-        else:
-            self.defaultNamespace = None
-        self.reset()
-
-    def reset(self):
-        self.openElements = []
-        self.activeFormattingElements = ActiveFormattingElements()
-
-        # XXX - rename these to headElement, formElement
-        self.headPointer = None
-        self.formPointer = None
-
-        self.insertFromTable = False
-
-        self.document = self.documentClass()
-
-    def elementInScope(self, target, variant=None):
-
-        # If we pass a node in we match that. if we pass a string
-        # match any node with that name
-        exactNode = hasattr(target, "nameTuple")
-        if not exactNode:
-            if isinstance(target, text_type):
-                target = (namespaces["html"], target)
-            assert isinstance(target, tuple)
-
-        listElements, invert = listElementsMap[variant]
-
-        for node in reversed(self.openElements):
-            if exactNode and node == target:
-                return True
-            elif not exactNode and node.nameTuple == target:
-                return True
-            elif (invert ^ (node.nameTuple in listElements)):
-                return False
-
-        assert False  # We should never reach this point
-
-    def reconstructActiveFormattingElements(self):
-        # Within this algorithm the order of steps described in the
-        # specification is not quite the same as the order of steps in the
-        # code. It should still do the same though.
-
-        # Step 1: stop the algorithm when there's nothing to do.
-        if not self.activeFormattingElements:
-            return
-
-        # Step 2 and step 3: we start with the last element. So i is -1.
-        i = len(self.activeFormattingElements) - 1
-        entry = self.activeFormattingElements[i]
-        if entry == Marker or entry in self.openElements:
-            return
-
-        # Step 6
-        while entry != Marker and entry not in self.openElements:
-            if i == 0:
-                # This will be reset to 0 below
-                i = -1
-                break
-            i -= 1
-            # Step 5: let entry be one earlier in the list.
-            entry = self.activeFormattingElements[i]
-
-        while True:
-            # Step 7
-            i += 1
-
-            # Step 8
-            entry = self.activeFormattingElements[i]
-            clone = entry.cloneNode()  # Mainly to get a new copy of the attributes
-
-            # Step 9
-            element = self.insertElement({"type": "StartTag",
-                                          "name": clone.name,
-                                          "namespace": clone.namespace,
-                                          "data": clone.attributes})
-
-            # Step 10
-            self.activeFormattingElements[i] = element
-
-            # Step 11
-            if element == self.activeFormattingElements[-1]:
-                break
-
-    def clearActiveFormattingElements(self):
-        entry = self.activeFormattingElements.pop()
-        while self.activeFormattingElements and entry != Marker:
-            entry = self.activeFormattingElements.pop()
-
-    def elementInActiveFormattingElements(self, name):
-        """Check if an element exists between the end of the active
-        formatting elements and the last marker. If it does, return it, else
-        return false"""
-
-        for item in self.activeFormattingElements[::-1]:
-            # Check for Marker first because if it's a Marker it doesn't have a
-            # name attribute.
-            if item == Marker:
-                break
-            elif item.name == name:
-                return item
-        return False
-
-    def insertRoot(self, token):
-        element = self.createElement(token)
-        self.openElements.append(element)
-        self.document.appendChild(element)
-
-    def insertDoctype(self, token):
-        name = token["name"]
-        publicId = token["publicId"]
-        systemId = token["systemId"]
-
-        doctype = self.doctypeClass(name, publicId, systemId)
-        self.document.appendChild(doctype)
-
-    def insertComment(self, token, parent=None):
-        if parent is None:
-            parent = self.openElements[-1]
-        parent.appendChild(self.commentClass(token["data"]))
-
-    def createElement(self, token):
-        """Create an element but don't insert it anywhere"""
-        name = token["name"]
-        namespace = token.get("namespace", self.defaultNamespace)
-        element = self.elementClass(name, namespace)
-        element.attributes = token["data"]
-        return element
-
-    def _getInsertFromTable(self):
-        return self._insertFromTable
-
-    def _setInsertFromTable(self, value):
-        """Switch the function used to insert an element from the
-        normal one to the misnested table one and back again"""
-        self._insertFromTable = value
-        if value:
-            self.insertElement = self.insertElementTable
-        else:
-            self.insertElement = self.insertElementNormal
-
-    insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
-
-    def insertElementNormal(self, token):
-        name = token["name"]
-        assert isinstance(name, text_type), "Element %s not unicode" % name
-        namespace = token.get("namespace", self.defaultNamespace)
-        element = self.elementClass(name, namespace)
-        element.attributes = token["data"]
-        self.openElements[-1].appendChild(element)
-        self.openElements.append(element)
-        return element
-
-    def insertElementTable(self, token):
-        """Create an element and insert it into the tree"""
-        element = self.createElement(token)
-        if self.openElements[-1].name not in tableInsertModeElements:
-            return self.insertElementNormal(token)
-        else:
-            # We should be in the InTable mode. This means we want to do
-            # special magic element rearranging
-            parent, insertBefore = self.getTableMisnestedNodePosition()
-            if insertBefore is None:
-                parent.appendChild(element)
-            else:
-                parent.insertBefore(element, insertBefore)
-            self.openElements.append(element)
-        return element
-
-    def insertText(self, data, parent=None):
-        """Insert text data."""
-        if parent is None:
-            parent = self.openElements[-1]
-
-        if (not self.insertFromTable or (self.insertFromTable and
-                                         self.openElements[-1].name
-                                         not in tableInsertModeElements)):
-            parent.insertText(data)
-        else:
-            # We should be in the InTable mode. This means we want to do
-            # special magic element rearranging
-            parent, insertBefore = self.getTableMisnestedNodePosition()
-            parent.insertText(data, insertBefore)
-
-    def getTableMisnestedNodePosition(self):
-        """Get the foster parent element, and sibling to insert before
-        (or None) when inserting a misnested table node"""
-        # The foster parent element is the one which comes before the most
-        # recently opened table element
-        # XXX - this is really inelegant
-        lastTable = None
-        fosterParent = None
-        insertBefore = None
-        for elm in self.openElements[::-1]:
-            if elm.name == "table":
-                lastTable = elm
-                break
-        if lastTable:
-            # XXX - we should really check that this parent is actually a
-            # node here
-            if lastTable.parent:
-                fosterParent = lastTable.parent
-                insertBefore = lastTable
-            else:
-                fosterParent = self.openElements[
-                    self.openElements.index(lastTable) - 1]
-        else:
-            fosterParent = self.openElements[0]
-        return fosterParent, insertBefore
-
-    def generateImpliedEndTags(self, exclude=None):
-        name = self.openElements[-1].name
-        # XXX td, th and tr are not actually needed
-        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
-                name != exclude):
-            self.openElements.pop()
-            # XXX This is not entirely what the specification says. We should
-            # investigate it more closely.
-            self.generateImpliedEndTags(exclude)
-
-    def getDocument(self):
-        """Return the final tree"""
-        return self.document
-
-    def getFragment(self):
-        """Return the final fragment"""
-        # assert self.innerHTML
-        fragment = self.fragmentClass()
-        self.openElements[0].reparentChildren(fragment)
-        return fragment
-
-    def testSerializer(self, node):
-        """Serialize the subtree of node in the format required by unit tests
-
-        :arg node: the node from which to start serializing
-
-        """
-        raise NotImplementedError
diff --git a/src/pip/_vendor/html5lib/treebuilders/dom.py b/src/pip/_vendor/html5lib/treebuilders/dom.py
deleted file mode 100644
index d8b530046..000000000
--- a/src/pip/_vendor/html5lib/treebuilders/dom.py
+++ /dev/null
@@ -1,239 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-try:
-    from collections.abc import MutableMapping
-except ImportError:  # Python 2.7
-    from collections import MutableMapping
-from xml.dom import minidom, Node
-import weakref
-
-from . import base
-from .. import constants
-from ..constants import namespaces
-from .._utils import moduleFactoryFactory
-
-
-def getDomBuilder(DomImplementation):
-    Dom = DomImplementation
-
-    class AttrList(MutableMapping):
-        def __init__(self, element):
-            self.element = element
-
-        def __iter__(self):
-            return iter(self.element.attributes.keys())
-
-        def __setitem__(self, name, value):
-            if isinstance(name, tuple):
-                raise NotImplementedError
-            else:
-                attr = self.element.ownerDocument.createAttribute(name)
-                attr.value = value
-                self.element.attributes[name] = attr
-
-        def __len__(self):
-            return len(self.element.attributes)
-
-        def items(self):
-            return list(self.element.attributes.items())
-
-        def values(self):
-            return list(self.element.attributes.values())
-
-        def __getitem__(self, name):
-            if isinstance(name, tuple):
-                raise NotImplementedError
-            else:
-                return self.element.attributes[name].value
-
-        def __delitem__(self, name):
-            if isinstance(name, tuple):
-                raise NotImplementedError
-            else:
-                del self.element.attributes[name]
-
-    class NodeBuilder(base.Node):
-        def __init__(self, element):
-            base.Node.__init__(self, element.nodeName)
-            self.element = element
-
-        namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
-                             self.element.namespaceURI or None)
-
-        def appendChild(self, node):
-            node.parent = self
-            self.element.appendChild(node.element)
-
-        def insertText(self, data, insertBefore=None):
-            text = self.element.ownerDocument.createTextNode(data)
-            if insertBefore:
-                self.element.insertBefore(text, insertBefore.element)
-            else:
-                self.element.appendChild(text)
-
-        def insertBefore(self, node, refNode):
-            self.element.insertBefore(node.element, refNode.element)
-            node.parent = self
-
-        def removeChild(self, node):
-            if node.element.parentNode == self.element:
-                self.element.removeChild(node.element)
-            node.parent = None
-
-        def reparentChildren(self, newParent):
-            while self.element.hasChildNodes():
-                child = self.element.firstChild
-                self.element.removeChild(child)
-                newParent.element.appendChild(child)
-            self.childNodes = []
-
-        def getAttributes(self):
-            return AttrList(self.element)
-
-        def setAttributes(self, attributes):
-            if attributes:
-                for name, value in list(attributes.items()):
-                    if isinstance(name, tuple):
-                        if name[0] is not None:
-                            qualifiedName = (name[0] + ":" + name[1])
-                        else:
-                            qualifiedName = name[1]
-                        self.element.setAttributeNS(name[2], qualifiedName,
-                                                    value)
-                    else:
-                        self.element.setAttribute(
-                            name, value)
-        attributes = property(getAttributes, setAttributes)
-
-        def cloneNode(self):
-            return NodeBuilder(self.element.cloneNode(False))
-
-        def hasContent(self):
-            return self.element.hasChildNodes()
-
-        def getNameTuple(self):
-            if self.namespace is None:
-                return namespaces["html"], self.name
-            else:
-                return self.namespace, self.name
-
-        nameTuple = property(getNameTuple)
-
-    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
-        def documentClass(self):
-            self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
-            return weakref.proxy(self)
-
-        def insertDoctype(self, token):
-            name = token["name"]
-            publicId = token["publicId"]
-            systemId = token["systemId"]
-
-            domimpl = Dom.getDOMImplementation()
-            doctype = domimpl.createDocumentType(name, publicId, systemId)
-            self.document.appendChild(NodeBuilder(doctype))
-            if Dom == minidom:
-                doctype.ownerDocument = self.dom
-
-        def elementClass(self, name, namespace=None):
-            if namespace is None and self.defaultNamespace is None:
-                node = self.dom.createElement(name)
-            else:
-                node = self.dom.createElementNS(namespace, name)
-
-            return NodeBuilder(node)
-
-        def commentClass(self, data):
-            return NodeBuilder(self.dom.createComment(data))
-
-        def fragmentClass(self):
-            return NodeBuilder(self.dom.createDocumentFragment())
-
-        def appendChild(self, node):
-            self.dom.appendChild(node.element)
-
-        def testSerializer(self, element):
-            return testSerializer(element)
-
-        def getDocument(self):
-            return self.dom
-
-        def getFragment(self):
-            return base.TreeBuilder.getFragment(self).element
-
-        def insertText(self, data, parent=None):
-            data = data
-            if parent != self:
-                base.TreeBuilder.insertText(self, data, parent)
-            else:
-                # HACK: allow text nodes as children of the document node
-                if hasattr(self.dom, '_child_node_types'):
-                    # pylint:disable=protected-access
-                    if Node.TEXT_NODE not in self.dom._child_node_types:
-                        self.dom._child_node_types = list(self.dom._child_node_types)
-                        self.dom._child_node_types.append(Node.TEXT_NODE)
-                self.dom.appendChild(self.dom.createTextNode(data))
-
-        implementation = DomImplementation
-        name = None
-
-    def testSerializer(element):
-        element.normalize()
-        rv = []
-
-        def serializeElement(element, indent=0):
-            if element.nodeType == Node.DOCUMENT_TYPE_NODE:
-                if element.name:
-                    if element.publicId or element.systemId:
-                        publicId = element.publicId or ""
-                        systemId = element.systemId or ""
-                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
-                                  (' ' * indent, element.name, publicId, systemId))
-                    else:
-                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
-                else:
-                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
-            elif element.nodeType == Node.DOCUMENT_NODE:
-                rv.append("#document")
-            elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
-                rv.append("#document-fragment")
-            elif element.nodeType == Node.COMMENT_NODE:
-                rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
-            elif element.nodeType == Node.TEXT_NODE:
-                rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
-            else:
-                if (hasattr(element, "namespaceURI") and
-                        element.namespaceURI is not None):
-                    name = "%s %s" % (constants.prefixes[element.namespaceURI],
-                                      element.nodeName)
-                else:
-                    name = element.nodeName
-                rv.append("|%s<%s>" % (' ' * indent, name))
-                if element.hasAttributes():
-                    attributes = []
-                    for i in range(len(element.attributes)):
-                        attr = element.attributes.item(i)
-                        name = attr.nodeName
-                        value = attr.value
-                        ns = attr.namespaceURI
-                        if ns:
-                            name = "%s %s" % (constants.prefixes[ns], attr.localName)
-                        else:
-                            name = attr.nodeName
-                        attributes.append((name, value))
-
-                    for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
-            indent += 2
-            for child in element.childNodes:
-                serializeElement(child, indent)
-        serializeElement(element, 0)
-
-        return "\n".join(rv)
-
-    return locals()
-
-
-# The actual means to get a module!
-getDomModule = moduleFactoryFactory(getDomBuilder)
diff --git a/src/pip/_vendor/html5lib/treebuilders/etree.py b/src/pip/_vendor/html5lib/treebuilders/etree.py
deleted file mode 100644
index ea92dc301..000000000
--- a/src/pip/_vendor/html5lib/treebuilders/etree.py
+++ /dev/null
@@ -1,343 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-# pylint:disable=protected-access
-
-from pip._vendor.six import text_type
-
-import re
-
-from copy import copy
-
-from . import base
-from .. import _ihatexml
-from .. import constants
-from ..constants import namespaces
-from .._utils import moduleFactoryFactory
-
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-
-def getETreeBuilder(ElementTreeImplementation, fullTree=False):
-    ElementTree = ElementTreeImplementation
-    ElementTreeCommentType = ElementTree.Comment("asd").tag
-
-    class Element(base.Node):
-        def __init__(self, name, namespace=None):
-            self._name = name
-            self._namespace = namespace
-            self._element = ElementTree.Element(self._getETreeTag(name,
-                                                                  namespace))
-            if namespace is None:
-                self.nameTuple = namespaces["html"], self._name
-            else:
-                self.nameTuple = self._namespace, self._name
-            self.parent = None
-            self._childNodes = []
-            self._flags = []
-
-        def _getETreeTag(self, name, namespace):
-            if namespace is None:
-                etree_tag = name
-            else:
-                etree_tag = "{%s}%s" % (namespace, name)
-            return etree_tag
-
-        def _setName(self, name):
-            self._name = name
-            self._element.tag = self._getETreeTag(self._name, self._namespace)
-
-        def _getName(self):
-            return self._name
-
-        name = property(_getName, _setName)
-
-        def _setNamespace(self, namespace):
-            self._namespace = namespace
-            self._element.tag = self._getETreeTag(self._name, self._namespace)
-
-        def _getNamespace(self):
-            return self._namespace
-
-        namespace = property(_getNamespace, _setNamespace)
-
-        def _getAttributes(self):
-            return self._element.attrib
-
-        def _setAttributes(self, attributes):
-            el_attrib = self._element.attrib
-            el_attrib.clear()
-            if attributes:
-                # calling .items _always_ allocates, and the above truthy check is cheaper than the
-                # allocation on average
-                for key, value in attributes.items():
-                    if isinstance(key, tuple):
-                        name = "{%s}%s" % (key[2], key[1])
-                    else:
-                        name = key
-                    el_attrib[name] = value
-
-        attributes = property(_getAttributes, _setAttributes)
-
-        def _getChildNodes(self):
-            return self._childNodes
-
-        def _setChildNodes(self, value):
-            del self._element[:]
-            self._childNodes = []
-            for element in value:
-                self.insertChild(element)
-
-        childNodes = property(_getChildNodes, _setChildNodes)
-
-        def hasContent(self):
-            """Return true if the node has children or text"""
-            return bool(self._element.text or len(self._element))
-
-        def appendChild(self, node):
-            self._childNodes.append(node)
-            self._element.append(node._element)
-            node.parent = self
-
-        def insertBefore(self, node, refNode):
-            index = list(self._element).index(refNode._element)
-            self._element.insert(index, node._element)
-            node.parent = self
-
-        def removeChild(self, node):
-            self._childNodes.remove(node)
-            self._element.remove(node._element)
-            node.parent = None
-
-        def insertText(self, data, insertBefore=None):
-            if not(len(self._element)):
-                if not self._element.text:
-                    self._element.text = ""
-                self._element.text += data
-            elif insertBefore is None:
-                # Insert the text as the tail of the last child element
-                if not self._element[-1].tail:
-                    self._element[-1].tail = ""
-                self._element[-1].tail += data
-            else:
-                # Insert the text before the specified node
-                children = list(self._element)
-                index = children.index(insertBefore._element)
-                if index > 0:
-                    if not self._element[index - 1].tail:
-                        self._element[index - 1].tail = ""
-                    self._element[index - 1].tail += data
-                else:
-                    if not self._element.text:
-                        self._element.text = ""
-                    self._element.text += data
-
-        def cloneNode(self):
-            element = type(self)(self.name, self.namespace)
-            if self._element.attrib:
-                element._element.attrib = copy(self._element.attrib)
-            return element
-
-        def reparentChildren(self, newParent):
-            if newParent.childNodes:
-                newParent.childNodes[-1]._element.tail += self._element.text
-            else:
-                if not newParent._element.text:
-                    newParent._element.text = ""
-                if self._element.text is not None:
-                    newParent._element.text += self._element.text
-            self._element.text = ""
-            base.Node.reparentChildren(self, newParent)
-
-    class Comment(Element):
-        def __init__(self, data):
-            # Use the superclass constructor to set all properties on the
-            # wrapper element
-            self._element = ElementTree.Comment(data)
-            self.parent = None
-            self._childNodes = []
-            self._flags = []
-
-        def _getData(self):
-            return self._element.text
-
-        def _setData(self, value):
-            self._element.text = value
-
-        data = property(_getData, _setData)
-
-    class DocumentType(Element):
-        def __init__(self, name, publicId, systemId):
-            Element.__init__(self, "<!DOCTYPE>")
-            self._element.text = name
-            self.publicId = publicId
-            self.systemId = systemId
-
-        def _getPublicId(self):
-            return self._element.get("publicId", "")
-
-        def _setPublicId(self, value):
-            if value is not None:
-                self._element.set("publicId", value)
-
-        publicId = property(_getPublicId, _setPublicId)
-
-        def _getSystemId(self):
-            return self._element.get("systemId", "")
-
-        def _setSystemId(self, value):
-            if value is not None:
-                self._element.set("systemId", value)
-
-        systemId = property(_getSystemId, _setSystemId)
-
-    class Document(Element):
-        def __init__(self):
-            Element.__init__(self, "DOCUMENT_ROOT")
-
-    class DocumentFragment(Element):
-        def __init__(self):
-            Element.__init__(self, "DOCUMENT_FRAGMENT")
-
-    def testSerializer(element):
-        rv = []
-
-        def serializeElement(element, indent=0):
-            if not(hasattr(element, "tag")):
-                element = element.getroot()
-            if element.tag == "<!DOCTYPE>":
-                if element.get("publicId") or element.get("systemId"):
-                    publicId = element.get("publicId") or ""
-                    systemId = element.get("systemId") or ""
-                    rv.append("""<!DOCTYPE %s "%s" "%s">""" %
-                              (element.text, publicId, systemId))
-                else:
-                    rv.append("<!DOCTYPE %s>" % (element.text,))
-            elif element.tag == "DOCUMENT_ROOT":
-                rv.append("#document")
-                if element.text is not None:
-                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
-                if element.tail is not None:
-                    raise TypeError("Document node cannot have tail")
-                if hasattr(element, "attrib") and len(element.attrib):
-                    raise TypeError("Document node cannot have attributes")
-            elif element.tag == ElementTreeCommentType:
-                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
-            else:
-                assert isinstance(element.tag, text_type), \
-                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
-                nsmatch = tag_regexp.match(element.tag)
-
-                if nsmatch is None:
-                    name = element.tag
-                else:
-                    ns, name = nsmatch.groups()
-                    prefix = constants.prefixes[ns]
-                    name = "%s %s" % (prefix, name)
-                rv.append("|%s<%s>" % (' ' * indent, name))
-
-                if hasattr(element, "attrib"):
-                    attributes = []
-                    for name, value in element.attrib.items():
-                        nsmatch = tag_regexp.match(name)
-                        if nsmatch is not None:
-                            ns, name = nsmatch.groups()
-                            prefix = constants.prefixes[ns]
-                            attr_string = "%s %s" % (prefix, name)
-                        else:
-                            attr_string = name
-                        attributes.append((attr_string, value))
-
-                    for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
-                if element.text:
-                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
-            indent += 2
-            for child in element:
-                serializeElement(child, indent)
-            if element.tail:
-                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
-        serializeElement(element, 0)
-
-        return "\n".join(rv)
-
-    def tostring(element):  # pylint:disable=unused-variable
-        """Serialize an element and its child nodes to a string"""
-        rv = []
-        filter = _ihatexml.InfosetFilter()
-
-        def serializeElement(element):
-            if isinstance(element, ElementTree.ElementTree):
-                element = element.getroot()
-
-            if element.tag == "<!DOCTYPE>":
-                if element.get("publicId") or element.get("systemId"):
-                    publicId = element.get("publicId") or ""
-                    systemId = element.get("systemId") or ""
-                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
-                              (element.text, publicId, systemId))
-                else:
-                    rv.append("<!DOCTYPE %s>" % (element.text,))
-            elif element.tag == "DOCUMENT_ROOT":
-                if element.text is not None:
-                    rv.append(element.text)
-                if element.tail is not None:
-                    raise TypeError("Document node cannot have tail")
-                if hasattr(element, "attrib") and len(element.attrib):
-                    raise TypeError("Document node cannot have attributes")
-
-                for child in element:
-                    serializeElement(child)
-
-            elif element.tag == ElementTreeCommentType:
-                rv.append("<!--%s-->" % (element.text,))
-            else:
-                # This is assumed to be an ordinary element
-                if not element.attrib:
-                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
-                else:
-                    attr = " ".join(["%s=\"%s\"" % (
-                        filter.fromXmlName(name), value)
-                        for name, value in element.attrib.items()])
-                    rv.append("<%s %s>" % (element.tag, attr))
-                if element.text:
-                    rv.append(element.text)
-
-                for child in element:
-                    serializeElement(child)
-
-                rv.append("</%s>" % (element.tag,))
-
-            if element.tail:
-                rv.append(element.tail)
-
-        serializeElement(element)
-
-        return "".join(rv)
-
-    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
-        documentClass = Document
-        doctypeClass = DocumentType
-        elementClass = Element
-        commentClass = Comment
-        fragmentClass = DocumentFragment
-        implementation = ElementTreeImplementation
-
-        def testSerializer(self, element):
-            return testSerializer(element)
-
-        def getDocument(self):
-            if fullTree:
-                return self.document._element
-            else:
-                if self.defaultNamespace is not None:
-                    return self.document._element.find(
-                        "{%s}html" % self.defaultNamespace)
-                else:
-                    return self.document._element.find("html")
-
-        def getFragment(self):
-            return base.TreeBuilder.getFragment(self)._element
-
-    return locals()
-
-
-getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/src/pip/_vendor/html5lib/treebuilders/etree_lxml.py b/src/pip/_vendor/html5lib/treebuilders/etree_lxml.py
deleted file mode 100644
index f037759f4..000000000
--- a/src/pip/_vendor/html5lib/treebuilders/etree_lxml.py
+++ /dev/null
@@ -1,392 +0,0 @@
-"""Module for supporting the lxml.etree library. The idea here is to use as much
-of the native library as possible, without using fragile hacks like custom element
-names that break between releases. The downside of this is that we cannot represent
-all possible trees; specifically the following are known to cause problems:
-
-Text or comments as siblings of the root element
-Docypes with no name
-
-When any of these things occur, we emit a DataLossWarning
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-# pylint:disable=protected-access
-
-import warnings
-import re
-import sys
-
-try:
-    from collections.abc import MutableMapping
-except ImportError:
-    from collections import MutableMapping
-
-from . import base
-from ..constants import DataLossWarning
-from .. import constants
-from . import etree as etree_builders
-from .. import _ihatexml
-
-import lxml.etree as etree
-from pip._vendor.six import PY3, binary_type
-
-
-fullTree = True
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-comment_type = etree.Comment("asd").tag
-
-
-class DocumentType(object):
-    def __init__(self, name, publicId, systemId):
-        self.name = name
-        self.publicId = publicId
-        self.systemId = systemId
-
-
-class Document(object):
-    def __init__(self):
-        self._elementTree = None
-        self._childNodes = []
-
-    def appendChild(self, element):
-        last = self._elementTree.getroot()
-        for last in self._elementTree.getroot().itersiblings():
-            pass
-
-        last.addnext(element._element)
-
-    def _getChildNodes(self):
-        return self._childNodes
-
-    childNodes = property(_getChildNodes)
-
-
-def testSerializer(element):
-    rv = []
-    infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
-
-    def serializeElement(element, indent=0):
-        if not hasattr(element, "tag"):
-            if hasattr(element, "getroot"):
-                # Full tree case
-                rv.append("#document")
-                if element.docinfo.internalDTD:
-                    if not (element.docinfo.public_id or
-                            element.docinfo.system_url):
-                        dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
-                    else:
-                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
-                            element.docinfo.root_name,
-                            element.docinfo.public_id,
-                            element.docinfo.system_url)
-                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
-                next_element = element.getroot()
-                while next_element.getprevious() is not None:
-                    next_element = next_element.getprevious()
-                while next_element is not None:
-                    serializeElement(next_element, indent + 2)
-                    next_element = next_element.getnext()
-            elif isinstance(element, str) or isinstance(element, bytes):
-                # Text in a fragment
-                assert isinstance(element, str) or sys.version_info[0] == 2
-                rv.append("|%s\"%s\"" % (' ' * indent, element))
-            else:
-                # Fragment case
-                rv.append("#document-fragment")
-                for next_element in element:
-                    serializeElement(next_element, indent + 2)
-        elif element.tag == comment_type:
-            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
-            if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
-        else:
-            assert isinstance(element, etree._Element)
-            nsmatch = etree_builders.tag_regexp.match(element.tag)
-            if nsmatch is not None:
-                ns = nsmatch.group(1)
-                tag = nsmatch.group(2)
-                prefix = constants.prefixes[ns]
-                rv.append("|%s<%s %s>" % (' ' * indent, prefix,
-                                          infosetFilter.fromXmlName(tag)))
-            else:
-                rv.append("|%s<%s>" % (' ' * indent,
-                                       infosetFilter.fromXmlName(element.tag)))
-
-            if hasattr(element, "attrib"):
-                attributes = []
-                for name, value in element.attrib.items():
-                    nsmatch = tag_regexp.match(name)
-                    if nsmatch is not None:
-                        ns, name = nsmatch.groups()
-                        name = infosetFilter.fromXmlName(name)
-                        prefix = constants.prefixes[ns]
-                        attr_string = "%s %s" % (prefix, name)
-                    else:
-                        attr_string = infosetFilter.fromXmlName(name)
-                    attributes.append((attr_string, value))
-
-                for name, value in sorted(attributes):
-                    rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
-
-            if element.text:
-                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
-            indent += 2
-            for child in element:
-                serializeElement(child, indent)
-            if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
-    serializeElement(element, 0)
-
-    return "\n".join(rv)
-
-
-def tostring(element):
-    """Serialize an element and its child nodes to a string"""
-    rv = []
-
-    def serializeElement(element):
-        if not hasattr(element, "tag"):
-            if element.docinfo.internalDTD:
-                if element.docinfo.doctype:
-                    dtd_str = element.docinfo.doctype
-                else:
-                    dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
-                rv.append(dtd_str)
-            serializeElement(element.getroot())
-
-        elif element.tag == comment_type:
-            rv.append("<!--%s-->" % (element.text,))
-
-        else:
-            # This is assumed to be an ordinary element
-            if not element.attrib:
-                rv.append("<%s>" % (element.tag,))
-            else:
-                attr = " ".join(["%s=\"%s\"" % (name, value)
-                                 for name, value in element.attrib.items()])
-                rv.append("<%s %s>" % (element.tag, attr))
-            if element.text:
-                rv.append(element.text)
-
-            for child in element:
-                serializeElement(child)
-
-            rv.append("</%s>" % (element.tag,))
-
-        if hasattr(element, "tail") and element.tail:
-            rv.append(element.tail)
-
-    serializeElement(element)
-
-    return "".join(rv)
-
-
-class TreeBuilder(base.TreeBuilder):
-    documentClass = Document
-    doctypeClass = DocumentType
-    elementClass = None
-    commentClass = None
-    fragmentClass = Document
-    implementation = etree
-
-    def __init__(self, namespaceHTMLElements, fullTree=False):
-        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
-        infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
-        self.namespaceHTMLElements = namespaceHTMLElements
-
-        class Attributes(MutableMapping):
-            def __init__(self, element):
-                self._element = element
-
-            def _coerceKey(self, key):
-                if isinstance(key, tuple):
-                    name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
-                else:
-                    name = infosetFilter.coerceAttribute(key)
-                return name
-
-            def __getitem__(self, key):
-                value = self._element._element.attrib[self._coerceKey(key)]
-                if not PY3 and isinstance(value, binary_type):
-                    value = value.decode("ascii")
-                return value
-
-            def __setitem__(self, key, value):
-                self._element._element.attrib[self._coerceKey(key)] = value
-
-            def __delitem__(self, key):
-                del self._element._element.attrib[self._coerceKey(key)]
-
-            def __iter__(self):
-                return iter(self._element._element.attrib)
-
-            def __len__(self):
-                return len(self._element._element.attrib)
-
-            def clear(self):
-                return self._element._element.attrib.clear()
-
-        class Element(builder.Element):
-            def __init__(self, name, namespace):
-                name = infosetFilter.coerceElement(name)
-                builder.Element.__init__(self, name, namespace=namespace)
-                self._attributes = Attributes(self)
-
-            def _setName(self, name):
-                self._name = infosetFilter.coerceElement(name)
-                self._element.tag = self._getETreeTag(
-                    self._name, self._namespace)
-
-            def _getName(self):
-                return infosetFilter.fromXmlName(self._name)
-
-            name = property(_getName, _setName)
-
-            def _getAttributes(self):
-                return self._attributes
-
-            def _setAttributes(self, value):
-                attributes = self.attributes
-                attributes.clear()
-                attributes.update(value)
-
-            attributes = property(_getAttributes, _setAttributes)
-
-            def insertText(self, data, insertBefore=None):
-                data = infosetFilter.coerceCharacters(data)
-                builder.Element.insertText(self, data, insertBefore)
-
-            def cloneNode(self):
-                element = type(self)(self.name, self.namespace)
-                if self._element.attrib:
-                    element._element.attrib.update(self._element.attrib)
-                return element
-
-        class Comment(builder.Comment):
-            def __init__(self, data):
-                data = infosetFilter.coerceComment(data)
-                builder.Comment.__init__(self, data)
-
-            def _setData(self, data):
-                data = infosetFilter.coerceComment(data)
-                self._element.text = data
-
-            def _getData(self):
-                return self._element.text
-
-            data = property(_getData, _setData)
-
-        self.elementClass = Element
-        self.commentClass = Comment
-        # self.fragmentClass = builder.DocumentFragment
-        base.TreeBuilder.__init__(self, namespaceHTMLElements)
-
-    def reset(self):
-        base.TreeBuilder.reset(self)
-        self.insertComment = self.insertCommentInitial
-        self.initial_comments = []
-        self.doctype = None
-
-    def testSerializer(self, element):
-        return testSerializer(element)
-
-    def getDocument(self):
-        if fullTree:
-            return self.document._elementTree
-        else:
-            return self.document._elementTree.getroot()
-
-    def getFragment(self):
-        fragment = []
-        element = self.openElements[0]._element
-        if element.text:
-            fragment.append(element.text)
-        fragment.extend(list(element))
-        if element.tail:
-            fragment.append(element.tail)
-        return fragment
-
-    def insertDoctype(self, token):
-        name = token["name"]
-        publicId = token["publicId"]
-        systemId = token["systemId"]
-
-        if not name:
-            warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
-            self.doctype = None
-        else:
-            coercedName = self.infosetFilter.coerceElement(name)
-            if coercedName != name:
-                warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
-
-            doctype = self.doctypeClass(coercedName, publicId, systemId)
-            self.doctype = doctype
-
-    def insertCommentInitial(self, data, parent=None):
-        assert parent is None or parent is self.document
-        assert self.document._elementTree is None
-        self.initial_comments.append(data)
-
-    def insertCommentMain(self, data, parent=None):
-        if (parent == self.document and
-                self.document._elementTree.getroot()[-1].tag == comment_type):
-            warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
-        super(TreeBuilder, self).insertComment(data, parent)
-
-    def insertRoot(self, token):
-        # Because of the way libxml2 works, it doesn't seem to be possible to
-        # alter information like the doctype after the tree has been parsed.
-        # Therefore we need to use the built-in parser to create our initial
-        # tree, after which we can add elements like normal
-        docStr = ""
-        if self.doctype:
-            assert self.doctype.name
-            docStr += "<!DOCTYPE %s" % self.doctype.name
-            if (self.doctype.publicId is not None or
-                    self.doctype.systemId is not None):
-                docStr += (' PUBLIC "%s" ' %
-                           (self.infosetFilter.coercePubid(self.doctype.publicId or "")))
-                if self.doctype.systemId:
-                    sysid = self.doctype.systemId
-                    if sysid.find("'") >= 0 and sysid.find('"') >= 0:
-                        warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
-                        sysid = sysid.replace("'", 'U00027')
-                    if sysid.find("'") >= 0:
-                        docStr += '"%s"' % sysid
-                    else:
-                        docStr += "'%s'" % sysid
-                else:
-                    docStr += "''"
-            docStr += ">"
-            if self.doctype.name != token["name"]:
-                warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
-        docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
-        root = etree.fromstring(docStr)
-
-        # Append the initial comments:
-        for comment_token in self.initial_comments:
-            comment = self.commentClass(comment_token["data"])
-            root.addprevious(comment._element)
-
-        # Create the root document and add the ElementTree to it
-        self.document = self.documentClass()
-        self.document._elementTree = root.getroottree()
-
-        # Give the root element the right name
-        name = token["name"]
-        namespace = token.get("namespace", self.defaultNamespace)
-        if namespace is None:
-            etree_tag = name
-        else:
-            etree_tag = "{%s}%s" % (namespace, name)
-        root.tag = etree_tag
-
-        # Add the root element to the internal child/open data structures
-        root_element = self.elementClass(name, namespace)
-        root_element._element = root
-        self.document._childNodes.append(root_element)
-        self.openElements.append(root_element)
-
-        # Reset to the default insert comment function
-        self.insertComment = self.insertCommentMain
diff --git a/src/pip/_vendor/html5lib/treewalkers/__init__.py b/src/pip/_vendor/html5lib/treewalkers/__init__.py
deleted file mode 100644
index b2d3aac31..000000000
--- a/src/pip/_vendor/html5lib/treewalkers/__init__.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""A collection of modules for iterating through different kinds of
-tree, generating tokens identical to those produced by the tokenizer
-module.
-
-To create a tree walker for a new type of tree, you need to
-implement a tree walker object (called TreeWalker by convention) that
-implements a 'serialize' method which takes a tree as sole argument and
-returns an iterator which generates tokens.
-"""
-
-from __future__ import absolute_import, division, unicode_literals
-
-from .. import constants
-from .._utils import default_etree
-
-__all__ = ["getTreeWalker", "pprint"]
-
-treeWalkerCache = {}
-
-
-def getTreeWalker(treeType, implementation=None, **kwargs):
-    """Get a TreeWalker class for various types of tree with built-in support
-
-    :arg str treeType: the name of the tree type required (case-insensitive).
-        Supported values are:
-
-        * "dom": The xml.dom.minidom DOM implementation
-        * "etree": A generic walker for tree implementations exposing an
-          elementtree-like interface (known to work with ElementTree,
-          cElementTree and lxml.etree).
-        * "lxml": Optimized walker for lxml.etree
-        * "genshi": a Genshi stream
-
-    :arg implementation: A module implementing the tree type e.g.
-        xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
-        tree type only).
-
-    :arg kwargs: keyword arguments passed to the etree walker--for other
-        walkers, this has no effect
-
-    :returns: a TreeWalker class
-
-    """
-
-    treeType = treeType.lower()
-    if treeType not in treeWalkerCache:
-        if treeType == "dom":
-            from . import dom
-            treeWalkerCache[treeType] = dom.TreeWalker
-        elif treeType == "genshi":
-            from . import genshi
-            treeWalkerCache[treeType] = genshi.TreeWalker
-        elif treeType == "lxml":
-            from . import etree_lxml
-            treeWalkerCache[treeType] = etree_lxml.TreeWalker
-        elif treeType == "etree":
-            from . import etree
-            if implementation is None:
-                implementation = default_etree
-            # XXX: NEVER cache here, caching is done in the etree submodule
-            return etree.getETreeModule(implementation, **kwargs).TreeWalker
-    return treeWalkerCache.get(treeType)
-
-
-def concatenateCharacterTokens(tokens):
-    pendingCharacters = []
-    for token in tokens:
-        type = token["type"]
-        if type in ("Characters", "SpaceCharacters"):
-            pendingCharacters.append(token["data"])
-        else:
-            if pendingCharacters:
-                yield {"type": "Characters", "data": "".join(pendingCharacters)}
-                pendingCharacters = []
-            yield token
-    if pendingCharacters:
-        yield {"type": "Characters", "data": "".join(pendingCharacters)}
-
-
-def pprint(walker):
-    """Pretty printer for tree walkers
-
-    Takes a TreeWalker instance and pretty prints the output of walking the tree.
-
-    :arg walker: a TreeWalker instance
-
-    """
-    output = []
-    indent = 0
-    for token in concatenateCharacterTokens(walker):
-        type = token["type"]
-        if type in ("StartTag", "EmptyTag"):
-            # tag name
-            if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
-                if token["namespace"] in constants.prefixes:
-                    ns = constants.prefixes[token["namespace"]]
-                else:
-                    ns = token["namespace"]
-                name = "%s %s" % (ns, token["name"])
-            else:
-                name = token["name"]
-            output.append("%s<%s>" % (" " * indent, name))
-            indent += 2
-            # attributes (sorted for consistent ordering)
-            attrs = token["data"]
-            for (namespace, localname), value in sorted(attrs.items()):
-                if namespace:
-                    if namespace in constants.prefixes:
-                        ns = constants.prefixes[namespace]
-                    else:
-                        ns = namespace
-                    name = "%s %s" % (ns, localname)
-                else:
-                    name = localname
-                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
-            # self-closing
-            if type == "EmptyTag":
-                indent -= 2
-
-        elif type == "EndTag":
-            indent -= 2
-
-        elif type == "Comment":
-            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
-
-        elif type == "Doctype":
-            if token["name"]:
-                if token["publicId"]:
-                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
-                                  (" " * indent,
-                                   token["name"],
-                                   token["publicId"],
-                                   token["systemId"] if token["systemId"] else ""))
-                elif token["systemId"]:
-                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
-                                  (" " * indent,
-                                   token["name"],
-                                   token["systemId"]))
-                else:
-                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
-                                                       token["name"]))
-            else:
-                output.append("%s<!DOCTYPE >" % (" " * indent,))
-
-        elif type == "Characters":
-            output.append("%s\"%s\"" % (" " * indent, token["data"]))
-
-        elif type == "SpaceCharacters":
-            assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
-
-        else:
-            raise ValueError("Unknown token type, %s" % type)
-
-    return "\n".join(output)
diff --git a/src/pip/_vendor/html5lib/treewalkers/base.py b/src/pip/_vendor/html5lib/treewalkers/base.py
deleted file mode 100644
index 80c474c4e..000000000
--- a/src/pip/_vendor/html5lib/treewalkers/base.py
+++ /dev/null
@@ -1,252 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom import Node
-from ..constants import namespaces, voidElements, spaceCharacters
-
-__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
-           "TreeWalker", "NonRecursiveTreeWalker"]
-
-DOCUMENT = Node.DOCUMENT_NODE
-DOCTYPE = Node.DOCUMENT_TYPE_NODE
-TEXT = Node.TEXT_NODE
-ELEMENT = Node.ELEMENT_NODE
-COMMENT = Node.COMMENT_NODE
-ENTITY = Node.ENTITY_NODE
-UNKNOWN = "<#UNKNOWN#>"
-
-spaceCharacters = "".join(spaceCharacters)
-
-
-class TreeWalker(object):
-    """Walks a tree yielding tokens
-
-    Tokens are dicts that all have a ``type`` field specifying the type of the
-    token.
-
-    """
-    def __init__(self, tree):
-        """Creates a TreeWalker
-
-        :arg tree: the tree to walk
-
-        """
-        self.tree = tree
-
-    def __iter__(self):
-        raise NotImplementedError
-
-    def error(self, msg):
-        """Generates an error token with the given message
-
-        :arg msg: the error message
-
-        :returns: SerializeError token
-
-        """
-        return {"type": "SerializeError", "data": msg}
-
-    def emptyTag(self, namespace, name, attrs, hasChildren=False):
-        """Generates an EmptyTag token
-
-        :arg namespace: the namespace of the token--can be ``None``
-
-        :arg name: the name of the element
-
-        :arg attrs: the attributes of the element as a dict
-
-        :arg hasChildren: whether or not to yield a SerializationError because
-            this tag shouldn't have children
-
-        :returns: EmptyTag token
-
-        """
-        yield {"type": "EmptyTag", "name": name,
-               "namespace": namespace,
-               "data": attrs}
-        if hasChildren:
-            yield self.error("Void element has children")
-
-    def startTag(self, namespace, name, attrs):
-        """Generates a StartTag token
-
-        :arg namespace: the namespace of the token--can be ``None``
-
-        :arg name: the name of the element
-
-        :arg attrs: the attributes of the element as a dict
-
-        :returns: StartTag token
-
-        """
-        return {"type": "StartTag",
-                "name": name,
-                "namespace": namespace,
-                "data": attrs}
-
-    def endTag(self, namespace, name):
-        """Generates an EndTag token
-
-        :arg namespace: the namespace of the token--can be ``None``
-
-        :arg name: the name of the element
-
-        :returns: EndTag token
-
-        """
-        return {"type": "EndTag",
-                "name": name,
-                "namespace": namespace}
-
-    def text(self, data):
-        """Generates SpaceCharacters and Characters tokens
-
-        Depending on what's in the data, this generates one or more
-        ``SpaceCharacters`` and ``Characters`` tokens.
-
-        For example:
-
-            >>> from html5lib.treewalkers.base import TreeWalker
-            >>> # Give it an empty tree just so it instantiates
-            >>> walker = TreeWalker([])
-            >>> list(walker.text(''))
-            []
-            >>> list(walker.text('  '))
-            [{u'data': '  ', u'type': u'SpaceCharacters'}]
-            >>> list(walker.text(' abc '))  # doctest: +NORMALIZE_WHITESPACE
-            [{u'data': ' ', u'type': u'SpaceCharacters'},
-            {u'data': u'abc', u'type': u'Characters'},
-            {u'data': u' ', u'type': u'SpaceCharacters'}]
-
-        :arg data: the text data
-
-        :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
-
-        """
-        data = data
-        middle = data.lstrip(spaceCharacters)
-        left = data[:len(data) - len(middle)]
-        if left:
-            yield {"type": "SpaceCharacters", "data": left}
-        data = middle
-        middle = data.rstrip(spaceCharacters)
-        right = data[len(middle):]
-        if middle:
-            yield {"type": "Characters", "data": middle}
-        if right:
-            yield {"type": "SpaceCharacters", "data": right}
-
-    def comment(self, data):
-        """Generates a Comment token
-
-        :arg data: the comment
-
-        :returns: Comment token
-
-        """
-        return {"type": "Comment", "data": data}
-
-    def doctype(self, name, publicId=None, systemId=None):
-        """Generates a Doctype token
-
-        :arg name:
-
-        :arg publicId:
-
-        :arg systemId:
-
-        :returns: the Doctype token
-
-        """
-        return {"type": "Doctype",
-                "name": name,
-                "publicId": publicId,
-                "systemId": systemId}
-
-    def entity(self, name):
-        """Generates an Entity token
-
-        :arg name: the entity name
-
-        :returns: an Entity token
-
-        """
-        return {"type": "Entity", "name": name}
-
-    def unknown(self, nodeType):
-        """Handles unknown node types"""
-        return self.error("Unknown node type: " + nodeType)
-
-
-class NonRecursiveTreeWalker(TreeWalker):
-    def getNodeDetails(self, node):
-        raise NotImplementedError
-
-    def getFirstChild(self, node):
-        raise NotImplementedError
-
-    def getNextSibling(self, node):
-        raise NotImplementedError
-
-    def getParentNode(self, node):
-        raise NotImplementedError
-
-    def __iter__(self):
-        currentNode = self.tree
-        while currentNode is not None:
-            details = self.getNodeDetails(currentNode)
-            type, details = details[0], details[1:]
-            hasChildren = False
-
-            if type == DOCTYPE:
-                yield self.doctype(*details)
-
-            elif type == TEXT:
-                for token in self.text(*details):
-                    yield token
-
-            elif type == ELEMENT:
-                namespace, name, attributes, hasChildren = details
-                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    for token in self.emptyTag(namespace, name, attributes,
-                                               hasChildren):
-                        yield token
-                    hasChildren = False
-                else:
-                    yield self.startTag(namespace, name, attributes)
-
-            elif type == COMMENT:
-                yield self.comment(details[0])
-
-            elif type == ENTITY:
-                yield self.entity(details[0])
-
-            elif type == DOCUMENT:
-                hasChildren = True
-
-            else:
-                yield self.unknown(details[0])
-
-            if hasChildren:
-                firstChild = self.getFirstChild(currentNode)
-            else:
-                firstChild = None
-
-            if firstChild is not None:
-                currentNode = firstChild
-            else:
-                while currentNode is not None:
-                    details = self.getNodeDetails(currentNode)
-                    type, details = details[0], details[1:]
-                    if type == ELEMENT:
-                        namespace, name, attributes, hasChildren = details
-                        if (namespace and namespace != namespaces["html"]) or name not in voidElements:
-                            yield self.endTag(namespace, name)
-                    if self.tree is currentNode:
-                        currentNode = None
-                        break
-                    nextSibling = self.getNextSibling(currentNode)
-                    if nextSibling is not None:
-                        currentNode = nextSibling
-                        break
-                    else:
-                        currentNode = self.getParentNode(currentNode)
diff --git a/src/pip/_vendor/html5lib/treewalkers/dom.py b/src/pip/_vendor/html5lib/treewalkers/dom.py
deleted file mode 100644
index b0c89b001..000000000
--- a/src/pip/_vendor/html5lib/treewalkers/dom.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from xml.dom import Node
-
-from . import base
-
-
-class TreeWalker(base.NonRecursiveTreeWalker):
-    def getNodeDetails(self, node):
-        if node.nodeType == Node.DOCUMENT_TYPE_NODE:
-            return base.DOCTYPE, node.name, node.publicId, node.systemId
-
-        elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
-            return base.TEXT, node.nodeValue
-
-        elif node.nodeType == Node.ELEMENT_NODE:
-            attrs = {}
-            for attr in list(node.attributes.keys()):
-                attr = node.getAttributeNode(attr)
-                if attr.namespaceURI:
-                    attrs[(attr.namespaceURI, attr.localName)] = attr.value
-                else:
-                    attrs[(None, attr.name)] = attr.value
-            return (base.ELEMENT, node.namespaceURI, node.nodeName,
-                    attrs, node.hasChildNodes())
-
-        elif node.nodeType == Node.COMMENT_NODE:
-            return base.COMMENT, node.nodeValue
-
-        elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
-            return (base.DOCUMENT,)
-
-        else:
-            return base.UNKNOWN, node.nodeType
-
-    def getFirstChild(self, node):
-        return node.firstChild
-
-    def getNextSibling(self, node):
-        return node.nextSibling
-
-    def getParentNode(self, node):
-        return node.parentNode
diff --git a/src/pip/_vendor/html5lib/treewalkers/etree.py b/src/pip/_vendor/html5lib/treewalkers/etree.py
deleted file mode 100644
index 837b27ec4..000000000
--- a/src/pip/_vendor/html5lib/treewalkers/etree.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from collections import OrderedDict
-import re
-
-from pip._vendor.six import string_types
-
-from . import base
-from .._utils import moduleFactoryFactory
-
-tag_regexp = re.compile("{([^}]*)}(.*)")
-
-
-def getETreeBuilder(ElementTreeImplementation):
-    ElementTree = ElementTreeImplementation
-    ElementTreeCommentType = ElementTree.Comment("asd").tag
-
-    class TreeWalker(base.NonRecursiveTreeWalker):  # pylint:disable=unused-variable
-        """Given the particular ElementTree representation, this implementation,
-        to avoid using recursion, returns "nodes" as tuples with the following
-        content:
-
-        1. The current element
-
-        2. The index of the element relative to its parent
-
-        3. A stack of ancestor elements
-
-        4. A flag "text", "tail" or None to indicate if the current node is a
-           text node; either the text or tail of the current element (1)
-        """
-        def getNodeDetails(self, node):
-            if isinstance(node, tuple):  # It might be the root Element
-                elt, _, _, flag = node
-                if flag in ("text", "tail"):
-                    return base.TEXT, getattr(elt, flag)
-                else:
-                    node = elt
-
-            if not(hasattr(node, "tag")):
-                node = node.getroot()
-
-            if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
-                return (base.DOCUMENT,)
-
-            elif node.tag == "<!DOCTYPE>":
-                return (base.DOCTYPE, node.text,
-                        node.get("publicId"), node.get("systemId"))
-
-            elif node.tag == ElementTreeCommentType:
-                return base.COMMENT, node.text
-
-            else:
-                assert isinstance(node.tag, string_types), type(node.tag)
-                # This is assumed to be an ordinary element
-                match = tag_regexp.match(node.tag)
-                if match:
-                    namespace, tag = match.groups()
-                else:
-                    namespace = None
-                    tag = node.tag
-                attrs = OrderedDict()
-                for name, value in list(node.attrib.items()):
-                    match = tag_regexp.match(name)
-                    if match:
-                        attrs[(match.group(1), match.group(2))] = value
-                    else:
-                        attrs[(None, name)] = value
-                return (base.ELEMENT, namespace, tag,
-                        attrs, len(node) or node.text)
-
-        def getFirstChild(self, node):
-            if isinstance(node, tuple):
-                element, key, parents, flag = node
-            else:
-                element, key, parents, flag = node, None, [], None
-
-            if flag in ("text", "tail"):
-                return None
-            else:
-                if element.text:
-                    return element, key, parents, "text"
-                elif len(element):
-                    parents.append(element)
-                    return element[0], 0, parents, None
-                else:
-                    return None
-
-        def getNextSibling(self, node):
-            if isinstance(node, tuple):
-                element, key, parents, flag = node
-            else:
-                return None
-
-            if flag == "text":
-                if len(element):
-                    parents.append(element)
-                    return element[0], 0, parents, None
-                else:
-                    return None
-            else:
-                if element.tail and flag != "tail":
-                    return element, key, parents, "tail"
-                elif key < len(parents[-1]) - 1:
-                    return parents[-1][key + 1], key + 1, parents, None
-                else:
-                    return None
-
-        def getParentNode(self, node):
-            if isinstance(node, tuple):
-                element, key, parents, flag = node
-            else:
-                return None
-
-            if flag == "text":
-                if not parents:
-                    return element
-                else:
-                    return element, key, parents, None
-            else:
-                parent = parents.pop()
-                if not parents:
-                    return parent
-                else:
-                    assert list(parents[-1]).count(parent) == 1
-                    return parent, list(parents[-1]).index(parent), parents, None
-
-    return locals()
-
-
-getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/src/pip/_vendor/html5lib/treewalkers/etree_lxml.py b/src/pip/_vendor/html5lib/treewalkers/etree_lxml.py
deleted file mode 100644
index c56af390f..000000000
--- a/src/pip/_vendor/html5lib/treewalkers/etree_lxml.py
+++ /dev/null
@@ -1,215 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-from pip._vendor.six import text_type
-
-from collections import OrderedDict
-
-from lxml import etree
-from ..treebuilders.etree import tag_regexp
-
-from . import base
-
-from .. import _ihatexml
-
-
-def ensure_str(s):
-    if s is None:
-        return None
-    elif isinstance(s, text_type):
-        return s
-    else:
-        return s.decode("ascii", "strict")
-
-
-class Root(object):
-    def __init__(self, et):
-        self.elementtree = et
-        self.children = []
-
-        try:
-            if et.docinfo.internalDTD:
-                self.children.append(Doctype(self,
-                                             ensure_str(et.docinfo.root_name),
-                                             ensure_str(et.docinfo.public_id),
-                                             ensure_str(et.docinfo.system_url)))
-        except AttributeError:
-            pass
-
-        try:
-            node = et.getroot()
-        except AttributeError:
-            node = et
-
-        while node.getprevious() is not None:
-            node = node.getprevious()
-        while node is not None:
-            self.children.append(node)
-            node = node.getnext()
-
-        self.text = None
-        self.tail = None
-
-    def __getitem__(self, key):
-        return self.children[key]
-
-    def getnext(self):
-        return None
-
-    def __len__(self):
-        return 1
-
-
-class Doctype(object):
-    def __init__(self, root_node, name, public_id, system_id):
-        self.root_node = root_node
-        self.name = name
-        self.public_id = public_id
-        self.system_id = system_id
-
-        self.text = None
-        self.tail = None
-
-    def getnext(self):
-        return self.root_node.children[1]
-
-
-class FragmentRoot(Root):
-    def __init__(self, children):
-        self.children = [FragmentWrapper(self, child) for child in children]
-        self.text = self.tail = None
-
-    def getnext(self):
-        return None
-
-
-class FragmentWrapper(object):
-    def __init__(self, fragment_root, obj):
-        self.root_node = fragment_root
-        self.obj = obj
-        if hasattr(self.obj, 'text'):
-            self.text = ensure_str(self.obj.text)
-        else:
-            self.text = None
-        if hasattr(self.obj, 'tail'):
-            self.tail = ensure_str(self.obj.tail)
-        else:
-            self.tail = None
-
-    def __getattr__(self, name):
-        return getattr(self.obj, name)
-
-    def getnext(self):
-        siblings = self.root_node.children
-        idx = siblings.index(self)
-        if idx < len(siblings) - 1:
-            return siblings[idx + 1]
-        else:
-            return None
-
-    def __getitem__(self, key):
-        return self.obj[key]
-
-    def __bool__(self):
-        return bool(self.obj)
-
-    def getparent(self):
-        return None
-
-    def __str__(self):
-        return str(self.obj)
-
-    def __unicode__(self):
-        return str(self.obj)
-
-    def __len__(self):
-        return len(self.obj)
-
-
-class TreeWalker(base.NonRecursiveTreeWalker):
-    def __init__(self, tree):
-        # pylint:disable=redefined-variable-type
-        if isinstance(tree, list):
-            self.fragmentChildren = set(tree)
-            tree = FragmentRoot(tree)
-        else:
-            self.fragmentChildren = set()
-            tree = Root(tree)
-        base.NonRecursiveTreeWalker.__init__(self, tree)
-        self.filter = _ihatexml.InfosetFilter()
-
-    def getNodeDetails(self, node):
-        if isinstance(node, tuple):  # Text node
-            node, key = node
-            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
-            return base.TEXT, ensure_str(getattr(node, key))
-
-        elif isinstance(node, Root):
-            return (base.DOCUMENT,)
-
-        elif isinstance(node, Doctype):
-            return base.DOCTYPE, node.name, node.public_id, node.system_id
-
-        elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
-            return base.TEXT, ensure_str(node.obj)
-
-        elif node.tag == etree.Comment:
-            return base.COMMENT, ensure_str(node.text)
-
-        elif node.tag == etree.Entity:
-            return base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
-
-        else:
-            # This is assumed to be an ordinary element
-            match = tag_regexp.match(ensure_str(node.tag))
-            if match:
-                namespace, tag = match.groups()
-            else:
-                namespace = None
-                tag = ensure_str(node.tag)
-            attrs = OrderedDict()
-            for name, value in list(node.attrib.items()):
-                name = ensure_str(name)
-                value = ensure_str(value)
-                match = tag_regexp.match(name)
-                if match:
-                    attrs[(match.group(1), match.group(2))] = value
-                else:
-                    attrs[(None, name)] = value
-            return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
-                    attrs, len(node) > 0 or node.text)
-
-    def getFirstChild(self, node):
-        assert not isinstance(node, tuple), "Text nodes have no children"
-
-        assert len(node) or node.text, "Node has no children"
-        if node.text:
-            return (node, "text")
-        else:
-            return node[0]
-
-    def getNextSibling(self, node):
-        if isinstance(node, tuple):  # Text node
-            node, key = node
-            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
-            if key == "text":
-                # XXX: we cannot use a "bool(node) and node[0] or None" construct here
-                # because node[0] might evaluate to False if it has no child element
-                if len(node):
-                    return node[0]
-                else:
-                    return None
-            else:  # tail
-                return node.getnext()
-
-        return (node, "tail") if node.tail else node.getnext()
-
-    def getParentNode(self, node):
-        if isinstance(node, tuple):  # Text node
-            node, key = node
-            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
-            if key == "text":
-                return node
-            # else: fallback to "normal" processing
-        elif node in self.fragmentChildren:
-            return None
-
-        return node.getparent()
diff --git a/src/pip/_vendor/html5lib/treewalkers/genshi.py b/src/pip/_vendor/html5lib/treewalkers/genshi.py
deleted file mode 100644
index 7483be27d..000000000
--- a/src/pip/_vendor/html5lib/treewalkers/genshi.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from genshi.core import QName
-from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
-from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
-
-from . import base
-
-from ..constants import voidElements, namespaces
-
-
-class TreeWalker(base.TreeWalker):
-    def __iter__(self):
-        # Buffer the events so we can pass in the following one
-        previous = None
-        for event in self.tree:
-            if previous is not None:
-                for token in self.tokens(previous, event):
-                    yield token
-            previous = event
-
-        # Don't forget the final event!
-        if previous is not None:
-            for token in self.tokens(previous, None):
-                yield token
-
-    def tokens(self, event, next):
-        kind, data, _ = event
-        if kind == START:
-            tag, attribs = data
-            name = tag.localname
-            namespace = tag.namespace
-            converted_attribs = {}
-            for k, v in attribs:
-                if isinstance(k, QName):
-                    converted_attribs[(k.namespace, k.localname)] = v
-                else:
-                    converted_attribs[(None, k)] = v
-
-            if namespace == namespaces["html"] and name in voidElements:
-                for token in self.emptyTag(namespace, name, converted_attribs,
-                                           not next or next[0] != END or
-                                           next[1] != tag):
-                    yield token
-            else:
-                yield self.startTag(namespace, name, converted_attribs)
-
-        elif kind == END:
-            name = data.localname
-            namespace = data.namespace
-            if namespace != namespaces["html"] or name not in voidElements:
-                yield self.endTag(namespace, name)
-
-        elif kind == COMMENT:
-            yield self.comment(data)
-
-        elif kind == TEXT:
-            for token in self.text(data):
-                yield token
-
-        elif kind == DOCTYPE:
-            yield self.doctype(*data)
-
-        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
-                      START_CDATA, END_CDATA, PI):
-            pass
-
-        else:
-            yield self.unknown(kind)
diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt
index 345b1f2c6..802013b6a 100644
--- a/src/pip/_vendor/vendor.txt
+++ b/src/pip/_vendor/vendor.txt
@@ -2,7 +2,6 @@ CacheControl==0.12.11  # Make sure to update the license in pyproject.toml for t
 colorama==0.4.4
 distlib==0.3.3
 distro==1.7.0
-html5lib==1.1
 msgpack==1.0.3
 packaging==21.3
 pep517==0.12.0
author	Stéphane Bidoul <stephane.bidoul@gmail.com>	2022-07-14 19:41:29 +0200
committer	Stéphane Bidoul <stephane.bidoul@gmail.com>	2022-07-16 19:37:11 +0200
commit	d3a318fe59484bcf6affbb0aa1833405aaa24a28 (patch)
tree	935c7a5d785540010d0b187dd43d76c9eb9a0377
parent	bb2a3d741006c180e3878d0843e0b0ebf3521f52 (diff)
download	pip-d3a318fe59484bcf6affbb0aa1833405aaa24a28.tar.gz