summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-08-24 07:22:07 +0200
committerStefan Behnel <stefan_ml@behnel.de>2019-08-24 07:22:07 +0200
commit8d56b24f5361c3203aa4eec41c3b91ecad02a78c (patch)
treee325d91eea427e7ee9c49e2660b1f908a1662968
parentf1245276b09a46d64d5b6202d7da4791af6388ed (diff)
downloadpython-lxml-8d56b24f5361c3203aa4eec41c3b91ecad02a78c.tar.gz
Add docstrings to Cleaner.allow_element() and Cleaner.allow_embedded_url().
-rw-r--r--src/lxml/html/clean.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index aa9fc57f..da1f8706 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -432,6 +432,12 @@ class Cleaner(object):
return False
def allow_element(self, el):
+ """
+ Decide whether an element is configured to be accepted or rejected.
+
+ :param el: an element.
+ :return: true to accept the element or false to reject/discard it.
+ """
if el.tag not in self._tag_link_attrs:
return False
attr = self._tag_link_attrs[el.tag]
@@ -450,8 +456,15 @@ class Cleaner(object):
return self.allow_embedded_url(el, url)
def allow_embedded_url(self, el, url):
- if (self.whitelist_tags is not None
- and el.tag not in self.whitelist_tags):
+ """
+ Decide whether a URL that was found in an element's attributes or text
+ if configured to be accepted or rejected.
+
+ :param el: an element.
+ :param url: a URL found on the element.
+ :return: true to accept the URL and false to reject it.
+ """
+ if self.whitelist_tags is not None and el.tag not in self.whitelist_tags:
return False
scheme, netloc, path, query, fragment = urlsplit(url)
netloc = netloc.lower().split(':', 1)[0]