summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2016-07-24 09:58:51 +0200
committerStefan Behnel <stefan_ml@behnel.de>2016-07-24 09:58:51 +0200
commitd3b73e561628998a8738b23268f4f8bf5ca98706 (patch)
tree2b694fa787af8096c98d00ee51a17590ab05e9ad
parent0a43d0b52ea35e33564b4e08b3e50f4f890f4758 (diff)
downloadpython-lxml-d3b73e561628998a8738b23268f4f8bf5ca98706.tar.gz
make the "inline_style" option in Cleaner default to (but independent of) the "style" option
-rw-r--r--src/lxml/html/clean.py14
1 files changed, 9 insertions, 5 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index 47a32749..f0da1cd6 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -95,6 +95,7 @@ _find_external_links = etree.XPath(
"descendant-or-self::x:a[normalize-space(@href) and substring(normalize-space(@href),1,1) != '#']"),
namespaces={'x':XHTML_NAMESPACE})
+
class Cleaner(object):
"""
Instances cleans the document of each of the possible offending
@@ -115,7 +116,7 @@ class Cleaner(object):
Removes any style tags.
``inline_style``
- Removes any style attributes.
+ Removes any style attributes. Defaults to the value of the ``style`` option.
``links``:
Removes any ``<link>`` tags
@@ -194,7 +195,7 @@ class Cleaner(object):
javascript = True
comments = True
style = False
- inline_style = False
+ inline_style = None
links = True
meta = True
page_structure = True
@@ -219,6 +220,8 @@ class Cleaner(object):
raise TypeError(
"Unknown parameter: %s=%r" % (name, value))
setattr(self, name, value)
+ if self.inline_style is None and 'inline_style' not in kw:
+ self.inline_style = self.style
# Used to lookup the primary URL for a given tag that is up for
# removal:
@@ -284,9 +287,9 @@ class Cleaner(object):
del attrib[aname]
doc.rewrite_links(self._remove_javascript_link,
resolve_base_href=False)
- if not self.style:
- # If we're deleting style then we don't have to remove JS links
- # from styles, otherwise...
+ # If we're deleting style then we don't have to remove JS links
+ # from styles, otherwise...
+ if not self.inline_style:
for el in _find_styled_elements(doc):
old = el.get('style')
new = _css_javascript_re.sub('', old)
@@ -296,6 +299,7 @@ class Cleaner(object):
del el.attrib['style']
elif new != old:
el.set('style', new)
+ if not self.style:
for el in list(doc.iter('style')):
if el.get('type', '').lower().strip() == 'text/javascript':
el.drop_tree()