summaryrefslogtreecommitdiff
path: root/src/lxml/html/diff.py
diff options
context:
space:
mode:
authorianb <none@none>2008-03-26 17:54:46 +0100
committerianb <none@none>2008-03-26 17:54:46 +0100
commitbb557dcefacccd583b174b87b5f268797449ed2f (patch)
tree8ab1316409c52a9eaefae1adc52dfaee8bb5f37d /src/lxml/html/diff.py
parent08c17a97639f2a7e12108276fdc983081dbf2c69 (diff)
downloadpython-lxml-bb557dcefacccd583b174b87b5f268797449ed2f.tar.gz
[svn r3460] Fix empty tags (e.g., <br>) in diffs.
--HG-- branch : trunk
Diffstat (limited to 'src/lxml/html/diff.py')
-rw-r--r--src/lxml/html/diff.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py
index 448faffb..3acf375e 100644
--- a/src/lxml/html/diff.py
+++ b/src/lxml/html/diff.py
@@ -139,6 +139,8 @@ def markup_serialize_tokens(tokens, markup_func):
############################################################
def htmldiff(old_html, new_html):
+ ## FIXME: this should take parsed documents too, and use their body
+ ## or other content.
""" Do a diff of the old and new document. The documents are HTML
*fragments* (str/UTF8 or unicode), they are not complete documents
(i.e., no <html> tag).
@@ -310,8 +312,6 @@ def split_unbalanced(chunks):
endtag = chunk[1] == '/'
name = chunk.split()[0].strip('<>/')
if name in empty_tags:
- assert not endtag, (
- "Empty tag %r should have no end tag" % chunk)
balanced.append(chunk)
continue
if endtag:
@@ -669,7 +669,7 @@ def flatten_el(el, include_hrefs, skip_tag=False):
yield ('img', el.attrib['src'], start_tag(el))
else:
yield start_tag(el)
- if el.tag in empty_tags and not el.text and not len(el):
+ if el.tag in empty_tags and not el.text and not len(el) and not el.tail:
return
start_words = split_words(el.text)
for word in start_words: