summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lxml/html/clean.py5
-rw-r--r--src/lxml/html/tests/test_clean.txt6
2 files changed, 6 insertions, 5 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index adc3f450..11da2958 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -8,9 +8,10 @@ import re
import copy
try:
from urlparse import urlsplit
+ from urllib import unquote_plus
except ImportError:
# Python 3
- from urllib.parse import urlsplit
+ from urllib.parse import urlsplit, unquote_plus
from lxml import etree
from lxml.html import defs
from lxml.html import fromstring, XHTML_NAMESPACE
@@ -482,7 +483,7 @@ class Cleaner(object):
def _remove_javascript_link(self, link):
# links like "j a v a s c r i p t:" might be interpreted in IE
- new = _substitute_whitespace('', link)
+ new = _substitute_whitespace('', unquote_plus(link))
if _is_javascript_scheme(new):
# FIXME: should this be None to delete?
return ''
diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt
index c78ab4f1..2824f64c 100644
--- a/src/lxml/html/tests/test_clean.txt
+++ b/src/lxml/html/tests/test_clean.txt
@@ -18,7 +18,7 @@
... <body onload="evil_function()">
... <!-- I am interpreted for EVIL! -->
... <a href="javascript:evil_function()">a link</a>
-... <a href="j\x01a\x02v\x03a\x04s\x05c\x06r\x07i\x0Ep t:evil_function()">a control char link</a>
+... <a href="j\x01a\x02v\x03a\x04s\x05c\x06r\x07i\x0Ep t%20:evil_function()">a control char link</a>
... <a href="data:text/html;base64,PHNjcmlwdD5hbGVydCgidGVzdCIpOzwvc2NyaXB0Pg==">data</a>
... <a href="#" onclick="evil_function()">another link</a>
... <p onclick="evil_function()">a paragraph</p>
@@ -51,7 +51,7 @@
<body onload="evil_function()">
<!-- I am interpreted for EVIL! -->
<a href="javascript:evil_function()">a link</a>
- <a href="javascrip t:evil_function()">a control char link</a>
+ <a href="javascrip t%20:evil_function()">a control char link</a>
<a href="data:text/html;base64,PHNjcmlwdD5hbGVydCgidGVzdCIpOzwvc2NyaXB0Pg==">data</a>
<a href="#" onclick="evil_function()">another link</a>
<p onclick="evil_function()">a paragraph</p>
@@ -84,7 +84,7 @@
<body onload="evil_function()">
<!-- I am interpreted for EVIL! -->
<a href="javascript:evil_function()">a link</a>
- <a href="javascrip%20t:evil_function()">a control char link</a>
+ <a href="javascrip%20t%20:evil_function()">a control char link</a>
<a href="data:text/html;base64,PHNjcmlwdD5hbGVydCgidGVzdCIpOzwvc2NyaXB0Pg==">data</a>
<a href="#" onclick="evil_function()">another link</a>
<p onclick="evil_function()">a paragraph</p>