summaryrefslogtreecommitdiff
path: root/src/lxml/html/diff.py
diff options
context:
space:
mode:
authorianb <none@none>2008-12-10 20:50:29 +0100
committerianb <none@none>2008-12-10 20:50:29 +0100
commit5267ed1da607864692c2223f4454eb49e90ce67d (patch)
treecd1ef0dbfdfb19cae61eec5fe0621bdced02ae41 /src/lxml/html/diff.py
parent01a142e57d8f93f4de14756de43ff8d9b51a0595 (diff)
downloadpython-lxml-5267ed1da607864692c2223f4454eb49e90ce67d.tar.gz
[svn r4032] Small cleanups to lxml.html.diff.
Export lxml.html.parse --HG-- branch : trunk
Diffstat (limited to 'src/lxml/html/diff.py')
-rw-r--r--src/lxml/html/diff.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py
index 92a1e1b6..bb495e49 100644
--- a/src/lxml/html/diff.py
+++ b/src/lxml/html/diff.py
@@ -1,6 +1,7 @@
import difflib
from lxml import etree
from lxml.html import fragment_fromstring
+from lxml.etree import _Element
import cgi
import re
@@ -507,7 +508,7 @@ class href_token(token):
hide_when_equal = True
def html(self):
- return 'Link: %s' % self
+ return ' Link: %s' % self
def tokenize(html, include_hrefs=True):
"""
@@ -524,7 +525,10 @@ def tokenize(html, include_hrefs=True):
If include_hrefs is true, then the href attribute of <a> tags is
included as a special kind of diffable token."""
- body_el = parse_html(html, cleanup=True)
+ if isinstance(html, _Element):
+ body_el = html
+ else:
+ body_el = parse_html(html, cleanup=True)
# Then we split the document into text chunks for each tag, word, and end tag:
chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs)
# Finally re-joining them into token objects: