summaryrefslogtreecommitdiff
path: root/openid
diff options
context:
space:
mode:
authorVlastimil Zíma <vlastimil.zima@nic.cz>2018-05-02 17:38:26 +0200
committerVlastimil Zíma <vlastimil.zima@nic.cz>2018-05-02 17:38:26 +0200
commitcf7908f1d8bfcb305dd547baf3d944807e0936a5 (patch)
treee965fed6f884ddcee0d2815f445852021577eb4f /openid
parent44293807fecb3eedf26a526dbb5961f1efa12642 (diff)
downloadopenid-cf7908f1d8bfcb305dd547baf3d944807e0936a5.tar.gz
Refactor consumer HTML parse
Diffstat (limited to 'openid')
-rw-r--r--openid/consumer/discover.py30
-rw-r--r--openid/consumer/html_parse.py263
-rw-r--r--openid/test/linkparse.txt584
-rw-r--r--openid/test/test_htmldiscover.py21
-rw-r--r--openid/test/test_linkparse.py88
5 files changed, 33 insertions, 953 deletions
diff --git a/openid/consumer/discover.py b/openid/consumer/discover.py
index b9bc30e..0824af4 100644
--- a/openid/consumer/discover.py
+++ b/openid/consumer/discover.py
@@ -16,8 +16,10 @@ __all__ = [
import logging
import urlparse
+from lxml.etree import LxmlError
+from lxml.html import document_fromstring
+
from openid import fetchers, urinorm
-from openid.consumer import html_parse
from openid.message import OPENID1_NS as OPENID_1_0_MESSAGE_NS, OPENID2_NS as OPENID_2_0_MESSAGE_NS
from openid.yadis import filters, xri, xrires
from openid.yadis.discover import DiscoveryFailure, discover as yadisDiscover
@@ -32,6 +34,8 @@ OPENID_2_0_TYPE = 'http://specs.openid.net/auth/2.0/signon'
OPENID_1_1_TYPE = 'http://openid.net/signon/1.1'
OPENID_1_0_TYPE = 'http://openid.net/signon/1.0'
+LINK_REL_XPATH = "/html/head/link[contains(concat(' ', normalize-space(@rel), ' '), ' {} ')]"
+
class OpenIDServiceEndpoint(object):
"""Object representing an OpenID service endpoint.
@@ -152,19 +156,29 @@ class OpenIDServiceEndpoint(object):
(OPENID_2_0_TYPE, 'openid2.provider', 'openid2.local_id'),
(OPENID_1_1_TYPE, 'openid.server', 'openid.delegate'),
]
-
- link_attrs = html_parse.parseLinkAttrs(html)
services = []
+
+ try:
+ parsed_html = document_fromstring(html)
+ except LxmlError:
+ # It's a dumb function. Return empty results in case of an error.
+ return []
for type_uri, op_endpoint_rel, local_id_rel in discovery_types:
- op_endpoint_url = html_parse.findFirstHref(
- link_attrs, op_endpoint_rel)
- if op_endpoint_url is None:
+ op_links = parsed_html.xpath(LINK_REL_XPATH.format(op_endpoint_rel))
+ if not op_links:
+ continue
+ op_endpoint_url = op_links[0].get('href')
+ if not op_endpoint_url:
continue
+ local_id_links = parsed_html.xpath(LINK_REL_XPATH.format(local_id_rel))
+ local_id = None
+ if local_id_links:
+ local_id = local_id_links[0].get('href')
+
service = cls()
service.claimed_id = uri
- service.local_id = html_parse.findFirstHref(
- link_attrs, local_id_rel)
+ service.local_id = local_id
service.server_url = op_endpoint_url
service.type_uris = [type_uri]
diff --git a/openid/consumer/html_parse.py b/openid/consumer/html_parse.py
deleted file mode 100644
index 3c2a025..0000000
--- a/openid/consumer/html_parse.py
+++ /dev/null
@@ -1,263 +0,0 @@
-"""
-This module implements a VERY limited parser that finds <link> tags in
-the head of HTML or XHTML documents and parses out their attributes
-according to the OpenID spec. It is a liberal parser, but it requires
-these things from the data in order to work:
-
- - There must be an open <html> tag
-
- - There must be an open <head> tag inside of the <html> tag
-
- - Only <link>s that are found inside of the <head> tag are parsed
- (this is by design)
-
- - The parser follows the OpenID specification in resolving the
- attributes of the link tags. This means that the attributes DO NOT
- get resolved as they would by an XML or HTML parser. In particular,
- only certain entities get replaced, and href attributes do not get
- resolved relative to a base URL.
-
-From http://openid.net/specs.bml#linkrel:
-
- - The openid.server URL MUST be an absolute URL. OpenID consumers
- MUST NOT attempt to resolve relative URLs.
-
- - The openid.server URL MUST NOT include entities other than &amp;,
- &lt;, &gt;, and &quot;.
-
-The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds of
-quoting are allowed for attributes.
-
-The parser deals with invalid markup in these ways:
-
- - Tag names are not case-sensitive
-
- - The <html> tag is accepted even when it is not at the top level
-
- - The <head> tag is accepted even when it is not a direct child of
- the <html> tag, but a <html> tag must be an ancestor of the <head>
- tag
-
- - <link> tags are accepted even when they are not direct children of
- the <head> tag, but a <head> tag must be an ancestor of the <link>
- tag
-
- - If there is no closing tag for an open <html> or <head> tag, the
- remainder of the document is viewed as being inside of the tag. If
- there is no closing tag for a <link> tag, the link tag is treated
- as a short tag. Exceptions to this rule are that <html> closes
- <html> and <body> or <head> closes <head>
-
- - Attributes of the <link> tag are not required to be quoted.
-
- - In the case of duplicated attribute names, the attribute coming
- last in the tag will be the value returned.
-
- - Any text that does not parse as an attribute within a link tag will
- be ignored. (e.g. <link pumpkin rel='openid.server' /> will ignore
- pumpkin)
-
- - If there are more than one <html> or <head> tag, the parser only
- looks inside of the first one.
-
- - The contents of <script> tags are ignored entirely, except unclosed
- <script> tags. Unclosed <script> tags are ignored.
-
- - Any other invalid markup is ignored, including unclosed SGML
- comments and unclosed <![CDATA[blocks.
-"""
-
-__all__ = ['parseLinkAttrs']
-
-import re
-from functools import partial
-
-flags = (
- # Match newlines with '.'
- re.DOTALL |
- re.IGNORECASE |
- # Allow comments and whitespace in patterns
- re.VERBOSE |
- # Make \b respect Unicode word boundaries
- re.UNICODE
-)
-
-# Stuff to remove before we start looking for tags
-removed_re = re.compile(r'''
- # Comments
- <!--.*?-->
-
- # CDATA blocks
-| <!\[CDATA\[.*?\]\]>
-
- # script blocks
-| <script\b
-
- # make sure script is not an XML namespace
- (?!:)
-
- [^>]*>.*?</script>
-
-''', flags)
-
-tag_expr = r'''
-# Starts with the tag name at a word boundary, where the tag name is
-# not a namespace
-<%(tag_name)s\b(?!:)
-
-# All of the stuff up to a ">", hopefully attributes.
-(?P<attrs>[^>]*?)
-
-(?: # Match a short tag
- />
-
-| # Match a full tag
- >
-
- (?P<contents>.*?)
-
- # Closed by
- (?: # One of the specified close tags
- </?%(closers)s\s*>
-
- # End of the string
- | \Z
-
- )
-
-)
-'''
-
-
-def tagMatcher(tag_name, *close_tags):
- if close_tags:
- options = '|'.join((tag_name,) + close_tags)
- closers = '(?:%s)' % (options,)
- else:
- closers = tag_name
-
- expr = tag_expr % locals()
- return re.compile(expr, flags)
-
-
-# Must contain at least an open html and an open head tag
-html_find = tagMatcher('html')
-head_find = tagMatcher('head', 'body')
-link_find = re.compile(r'<link\b(?!:)', flags)
-
-attr_find = re.compile(r'''
-# Must start with a sequence of word-characters, followed by an equals sign
-(?P<attr_name>\w+)=
-
-# Then either a quoted or unquoted attribute
-(?:
-
- # Match everything that\'s between matching quote marks
- (?P<qopen>["\'])(?P<q_val>.*?)(?P=qopen)
-|
-
- # If the value is not quoted, match up to whitespace
- (?P<unq_val>(?:[^\s<>/]|/(?!>))+)
-)
-
-|
-
-(?P<end_link>[<>])
-''', flags)
-
-# Entity replacement:
-replacements = {
- 'amp': '&',
- 'lt': '<',
- 'gt': '>',
- 'quot': '"',
-}
-
-ent_replace = re.compile(r'&(%s);' % '|'.join(replacements.keys()))
-
-
-def replaceEnt(mo):
- "Replace the entities that are specified by OpenID"
- return replacements.get(mo.group(1), mo.group())
-
-
-def parseLinkAttrs(html):
- """Find all link tags in a string representing a HTML document and
- return a list of their attributes.
-
- @param html: the text to parse
- @type html: str or unicode
-
- @return: A list of dictionaries of attributes, one for each link tag
- @rtype: [[(type(html), type(html))]]
- """
- stripped = removed_re.sub('', html)
- html_mo = html_find.search(stripped)
- if html_mo is None or html_mo.start('contents') == -1:
- return []
-
- start, end = html_mo.span('contents')
- head_mo = head_find.search(stripped, start, end)
- if head_mo is None or head_mo.start('contents') == -1:
- return []
-
- start, end = head_mo.span('contents')
- link_mos = link_find.finditer(stripped, head_mo.start(), head_mo.end())
-
- matches = []
- for link_mo in link_mos:
- start = link_mo.start() + 5
- link_attrs = {}
- for attr_mo in attr_find.finditer(stripped, start):
- if attr_mo.lastgroup == 'end_link':
- break
-
- # Either q_val or unq_val must be present, but not both
- # unq_val is a True (non-empty) value if it is present
- attr_name, q_val, unq_val = attr_mo.group(
- 'attr_name', 'q_val', 'unq_val')
- attr_val = ent_replace.sub(replaceEnt, unq_val or q_val)
-
- link_attrs[attr_name] = attr_val
-
- matches.append(link_attrs)
-
- return matches
-
-
-def relMatches(rel_attr, target_rel):
- """Does this target_rel appear in the rel_str?"""
- # XXX: TESTME
- rels = rel_attr.strip().split()
- for rel in rels:
- rel = rel.lower()
- if rel == target_rel:
- return 1
-
- return 0
-
-
-def linkHasRel(link_attrs, target_rel):
- """Does this link have target_rel as a relationship?"""
- # XXX: TESTME
- rel_attr = link_attrs.get('rel')
- return rel_attr and relMatches(rel_attr, target_rel)
-
-
-def findLinksRel(link_attrs_list, target_rel):
- """Filter the list of link attributes on whether it has target_rel
- as a relationship."""
- # XXX: TESTME
- matchesTarget = partial(linkHasRel, target_rel=target_rel)
- return [i for i in link_attrs_list if matchesTarget(i)]
-
-
-def findFirstHref(link_attrs_list, target_rel):
- """Return the value of the href attribute for the first link tag
- in the list that has target_rel as a relationship."""
- # XXX: TESTME
- matches = findLinksRel(link_attrs_list, target_rel)
- if not matches:
- return None
- first = matches[0]
- return first.get('href')
diff --git a/openid/test/linkparse.txt b/openid/test/linkparse.txt
deleted file mode 100644
index 74c63ca..0000000
--- a/openid/test/linkparse.txt
+++ /dev/null
@@ -1,584 +0,0 @@
-Num Tests: 72
-
-OpenID link parsing test cases
-Copyright (C) 2005-2008, JanRain, Inc.
-See COPYING for license information.
-
-File format
------------
-
-All text before the first triple-newline (this chunk) should be ignored.
-
-This file may be interpreted as Latin-1 or UTF-8.
-
-Test cases separated by three line separators (`\n\n\n'). The test
-cases consist of a headers section followed by a data block. These are
-separated by a double newline. The headers consist of the header name,
-followed by a colon, a space, the value, and a newline. There must be
-one, and only one, `Name' header for a test case. There may be zero or
-more link headers. The `Link' header consists of whitespace-separated
-attribute pairs. A link header with an empty string as a value
-indicates an empty but present link tag. The attribute pairs are `='
-separated and not quoted.
-
-Optional Links and attributes have a trailing `*'. A compilant
-implementation may produce this as output or may not. A compliant
-implementation will not produce any output that is absent from this
-file.
-
-
-Name: No link tag at all
-
-<html>
-<head>
-</head>
-</html>
-
-
-Name: Link element first
-
-<link>
-
-
-Name: Link inside HTML, not head
-
-<html>
-<link>
-
-
-Name: Link inside head, not html
-
-<head>
-<link>
-
-
-Name: Link inside html, after head
-
-<html>
-<head>
-</head>
-<link>
-
-
-Name: Link inside html, before head
-
-<html>
-<link>
-<head>
-
-
-Name: Link before html and head
-
-<link>
-<html>
-<head>
-
-
-Name: Link after html document with head
-
-<html>
-<head>
-</head>
-</html>
-<link>
-
-
-Name: Link inside html inside head, inside another html
-
-<html>
-<head>
-<html>
-<link>
-
-
-Name: Link inside html inside head
-
-<head>
-<html>
-<link>
-
-
-Name: link inside body inside head inside html
-
-<html>
-<head>
-<body>
-<link>
-
-
-Name: Link inside head inside head inside html
-
-<html>
-<head>
-<head>
-<link>
-
-
-Name: Link inside script inside head inside html
-
-<html>
-<head>
-<script>
-<link>
-</script>
-
-
-Name: Link inside comment inside head inside html
-
-<html>
-<head/>
-<link>
-
-
-Name: Link inside of head after short head
-
-<html>
-<head/>
-<head>
-<link>
-
-
-Name: Plain vanilla
-Link:
-
-<html>
-<head>
-<link>
-
-
-Name: Ignore tags in the <script:... > namespace
-Link*:
-
-<html>
-<head>
-<script:paddypan>
-<link>
-</script:paddypan>
-
-
-Name: Short link tag
-Link:
-
-<html>
-<head>
-<link/>
-
-
-Name: Spaces in the HTML tag
-Link:
-
-<html >
-<head>
-<link>
-
-
-Name: Spaces in the head tag
-Link:
-
-<html>
-<head >
-<link>
-
-
-Name: Spaces in the link tag
-Link:
-
-<html>
-<head>
-<link >
-
-
-Name: No whitespace
-Link:
-
-<html><head><link>
-
-
-Name: Closed head tag
-Link:
-
-<html>
-<head>
-<link>
-</head>
-
-
-Name: One good, one bad (after close head)
-Link:
-
-<html>
-<head>
-<link>
-</head>
-<link>
-
-
-Name: One good, one bad (after open body)
-Link:
-
-<html>
-<head>
-<link>
-<body>
-<link>
-
-
-Name: ill formed (missing close head)
-Link:
-
-<html>
-<head>
-<link>
-</html>
-
-
-Name: Ill formed (no close head, link after </html>)
-Link:
-
-<html>
-<head>
-<link>
-</html>
-<link>
-
-
-Name: Ignore random tags inside of html
-Link:
-
-<html>
-<delicata>
-<head>
-<title>
-<link>
-
-
-Name: case-folding
-Link*:
-
-<HtMl>
-<hEaD>
-<LiNk>
-
-
-Name: unexpected tags
-Link:
-
-<butternut>
-<html>
-<summer>
-<head>
-<turban>
-<link>
-
-
-Name: un-closed script tags
-Link*:
-
-<html>
-<head>
-<script>
-<link>
-
-
-Name: un-closed script tags (no whitespace)
-Link*:
-
-<html><head><script><link>
-
-
-Name: un-closed comment
-Link*:
-
-<html>
-<head>
-<!--
-<link>
-
-
-Name: un-closed CDATA
-Link*:
-
-<html>
-<head>
-<![CDATA[
-<link>
-
-
-Name: cdata-like
-Link*:
-
-<html>
-<head>
-<![ACORN[
-<link>
-]]>
-
-
-Name: comment close only
-Link:
-
-<html>
-<head>
-<link>
--->
-
-
-Name: Vanilla, two links
-Link:
-Link:
-
-<html>
-<head>
-<link>
-<link>
-
-
-Name: extra tag, two links
-Link:
-Link:
-
-<html>
-<gold nugget>
-<head>
-<link>
-<link>
-
-
-Name: case-fold, body ends, two links
-Link:
-Link*:
-
-<html>
-<head>
-<link>
-<LiNk>
-<body>
-<link>
-
-
-Name: simple, non-quoted rel
-Link: rel=openid.server
-
-<html><head><link rel=openid.server>
-
-
-Name: short tag has rel
-Link: rel=openid.server
-
-<html><head><link rel=openid.server/>
-
-
-Name: short tag w/space has rel
-Link: rel=openid.server
-
-<html><head><link rel=openid.server />
-
-
-Name: extra non-attribute, has rel
-Link: rel=openid.server
-
-<html><head><link hubbard rel=openid.server>
-
-
-Name: non-attr, has rel, short
-Link: rel=openid.server
-
-<html><head><link hubbard rel=openid.server/>
-
-
-Name: non-attr, has rel, short, space
-Link: rel=openid.server
-
-<html><head><link hubbard rel=openid.server />
-
-
-Name: misplaced slash has rel
-Link: rel=openid.server
-
-<html><head><link / rel=openid.server>
-
-
-Name: quoted rel
-Link: rel=openid.server
-
-<html><head><link rel="openid.server">
-
-
-Name: single-quoted rel
-Link: rel=openid.server
-
-<html><head><link rel='openid.server'>
-
-
-Name: two links w/ rel
-Link: x=y
-Link: a=b
-
-<html><head><link x=y><link a=b>
-
-
-Name: non-entity
-Link: x=&y
-
-<html><head><link x=&y>
-
-
-Name: quoted non-entity
-Link: x=&y
-
-<html><head><link x="&y">
-
-
-Name: quoted entity
-Link: x=&
-
-<html><head><link x="&amp;">
-
-
-Name: entity not processed
-Link: x=&#26;
-
-<html><head><link x="&#26;">
-
-
-Name: &lt;
-Link: x=<
-
-<html><head><link x="&lt;">
-
-
-Name: &gt;
-Link: x=>
-
-<html><head><link x="&gt;">
-
-
-Name: &quot;
-Link: x="
-
-<html><head><link x="&quot;">
-
-
-Name: &amp;&quot;
-Link: x=&"
-
-<html><head><link x="&amp;&quot;">
-
-
-Name: mixed entity and non-entity
-Link: x=&"&hellip;>
-
-<html><head><link x="&amp;&quot;&hellip;&gt;">
-
-
-Name: mixed entity and non-entity (w/normal chars)
-Link: x=x&"&hellip;>x
-
-<html><head><link x="x&amp;&quot;&hellip;&gt;x">
-
-
-Name: broken tags
-Link*: x=y
-
-<html><head><link x=y<>
-
-
-Name: missing close pointy
-Link*: x=y
-Link: z=y
-
-<html><head><link x=y<link z=y />
-
-
-Name: missing attribute value
-Link: x=y y*=
-Link: x=y
-
-<html><head><link x=y y=><link x=y />
-
-
-Name: Missing close pointy (no following)
-Link*: x=y
-
-<html><head><link x=y
-
-
-Name: Should be quoted
-Link*: x=<
-
-<html><head><link x="<">
-
-
-Name: Should be quoted (2)
-Link*: x=>
-
-<html><head><link x=">">
-
-
-Name: Repeated attribute
-Link: x=y
-
-<html><head><link x=z x=y>
-
-
-Name: Repeated attribute (2)
-Link: x=y
-
-<html><head><link x=y x=y>
-
-
-Name: Two attributes
-Link: x=y y=z
-
-<html><head><link x=y y=z>
-
-
-Name: Well-formed link rel="openid.server"
-Link: rel=openid.server href=http://www.myopenid.com/server
-
-<html>
- <head>
- <link rel="openid.server"
- href="http://www.myopenid.com/server" />
- </head>
-</html>
-
-
-Name: Well-formed link rel="openid.server" and "openid.delegate"
-Link: rel=openid.server href=http://www.myopenid.com/server
-Link: rel=openid.delegate href=http://example.myopenid.com/
-
-<html><head><link rel="openid.server"
- href="http://www.myopenid.com/server" />
- <link rel="openid.delegate" href="http://example.myopenid.com/" />
-</head></html>
-
-
-Name: from brian's livejournal page
-Link: rel=stylesheet href=http://www.livejournal.com/~serotta/res/319998/stylesheet?1130478711 type=text/css
-Link: rel=openid.server href=http://www.livejournal.com/openid/server.bml
-
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <link rel="stylesheet"
- href="http://www.livejournal.com/~serotta/res/319998/stylesheet?1130478711"
- type="text/css" />
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <meta name="foaf:maker"
- content="foaf:mbox_sha1sum '12f8abdacb5b1a806711e23249da592c0d316260'" />
- <meta name="robots" content="noindex, nofollow, noarchive" />
- <meta name="googlebot" content="nosnippet" />
- <link rel="openid.server"
- href="http://www.livejournal.com/openid/server.bml" />
- <title>Brian</title>
- </head>
-
-
-Name: non-ascii (Latin-1 or UTF8)
-Link: x=®
-
-<html><head><link x="®">
-
-
diff --git a/openid/test/test_htmldiscover.py b/openid/test/test_htmldiscover.py
index 65b036f..b4caeb3 100644
--- a/openid/test/test_htmldiscover.py
+++ b/openid/test/test_htmldiscover.py
@@ -3,14 +3,15 @@ import unittest
from openid.consumer.discover import OpenIDServiceEndpoint
-class BadLinksTestCase(unittest.TestCase):
- cases = [
- '',
- "http://not.in.a.link.tag/",
- '<link rel="openid.server" href="not.in.html.or.head" />',
- ]
+class TestFromHTML(unittest.TestCase):
+ """Test `OpenIDServiceEndpoint.fromHTML`."""
- def test_from_html(self):
- for html in self.cases:
- actual = OpenIDServiceEndpoint.fromHTML('http://unused.url/', html)
- self.assertEqual(actual, [])
+ def test_empty(self):
+ self.assertEqual(OpenIDServiceEndpoint.fromHTML('http://example.url/', ''), [])
+
+ def test_invalid_html(self):
+ self.assertEqual(OpenIDServiceEndpoint.fromHTML('http://example.url/', "http://not.in.a.link.tag/"), [])
+
+ def test_no_op_url(self):
+ html = '<html><head><link rel="openid.server"></head></html>'
+ self.assertEqual(OpenIDServiceEndpoint.fromHTML('http://example.url/', html), [])
diff --git a/openid/test/test_linkparse.py b/openid/test/test_linkparse.py
deleted file mode 100644
index 077caaf..0000000
--- a/openid/test/test_linkparse.py
+++ /dev/null
@@ -1,88 +0,0 @@
-"""Test `openid.consumer.html_parse` module."""
-import os.path
-import unittest
-
-from openid.consumer.html_parse import parseLinkAttrs
-
-
-def parseLink(line):
- parts = line.split()
- optional = parts[0] == 'Link*:'
- assert optional or parts[0] == 'Link:'
-
- attrs = {}
- for attr in parts[1:]:
- k, v = attr.split('=', 1)
- if k[-1] == '*':
- attr_optional = 1
- k = k[:-1]
- else:
- attr_optional = 0
-
- attrs[k] = (attr_optional, v)
-
- return (optional, attrs)
-
-
-def parseCase(s):
- header, markup = s.split('\n\n', 1)
- lines = header.split('\n')
- name = lines.pop(0)
- assert name.startswith('Name: ')
- desc = name[6:]
- return desc, markup, [parseLink(l) for l in lines]
-
-
-def parseTests(s):
- tests = []
-
- cases = s.split('\n\n\n')
- header = cases.pop(0)
- tests_line, _ = header.split('\n', 1)
- k, v = tests_line.split(': ')
- assert k == 'Num Tests'
- num_tests = int(v)
-
- for case in cases[:-1]:
- desc, markup, links = parseCase(case)
- tests.append((desc, markup, links, case))
-
- assert len(tests) == num_tests, (len(tests), num_tests)
- return num_tests, tests
-
-
-with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'linkparse.txt')) as link_test_data_file:
- link_test_data = link_test_data_file.read().decode('utf-8')
-
-
-class LinkTest(unittest.TestCase):
- """Test `parseLinkAttrs` function."""
-
- def runTest(self):
- num_tests, test_cases = parseTests(link_test_data)
-
- for desc, case, expected, raw in test_cases:
- actual = parseLinkAttrs(case)
- i = 0
- for optional, exp_link in expected:
- if optional:
- if i >= len(actual):
- continue
-
- act_link = actual[i]
- for k, (o, v) in exp_link.items():
- if o:
- act_v = act_link.get(k)
- if act_v is None:
- continue
- else:
- act_v = act_link[k]
-
- if optional and v != act_v:
- break
-
- self.assertEqual(v, act_v)
- else:
- i += 1
-
- assert i == len(actual)