summaryrefslogtreecommitdiff
path: root/src/lxml/html/clean.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/lxml/html/clean.py')
-rw-r--r--src/lxml/html/clean.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index aa9fc57f..da1f8706 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -432,6 +432,12 @@ class Cleaner(object):
return False
def allow_element(self, el):
+ """
+ Decide whether an element is configured to be accepted or rejected.
+
+ :param el: an element.
+ :return: true to accept the element or false to reject/discard it.
+ """
if el.tag not in self._tag_link_attrs:
return False
attr = self._tag_link_attrs[el.tag]
@@ -450,8 +456,15 @@ class Cleaner(object):
return self.allow_embedded_url(el, url)
def allow_embedded_url(self, el, url):
- if (self.whitelist_tags is not None
- and el.tag not in self.whitelist_tags):
+ """
+ Decide whether a URL that was found in an element's attributes or text
+ if configured to be accepted or rejected.
+
+ :param el: an element.
+ :param url: a URL found on the element.
+ :return: true to accept the URL and false to reject it.
+ """
+ if self.whitelist_tags is not None and el.tag not in self.whitelist_tags:
return False
scheme, netloc, path, query, fragment = urlsplit(url)
netloc = netloc.lower().split(':', 1)[0]