diff options
author | ianb <none@none> | 2008-03-26 17:54:46 +0100 |
---|---|---|
committer | ianb <none@none> | 2008-03-26 17:54:46 +0100 |
commit | bb557dcefacccd583b174b87b5f268797449ed2f (patch) | |
tree | 8ab1316409c52a9eaefae1adc52dfaee8bb5f37d /src/lxml/html/diff.py | |
parent | 08c17a97639f2a7e12108276fdc983081dbf2c69 (diff) | |
download | python-lxml-bb557dcefacccd583b174b87b5f268797449ed2f.tar.gz |
[svn r3460] Fix empty tags (e.g., <br>) in diffs.
--HG--
branch : trunk
Diffstat (limited to 'src/lxml/html/diff.py')
-rw-r--r-- | src/lxml/html/diff.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py index 448faffb..3acf375e 100644 --- a/src/lxml/html/diff.py +++ b/src/lxml/html/diff.py @@ -139,6 +139,8 @@ def markup_serialize_tokens(tokens, markup_func): ############################################################ def htmldiff(old_html, new_html): + ## FIXME: this should take parsed documents too, and use their body + ## or other content. """ Do a diff of the old and new document. The documents are HTML *fragments* (str/UTF8 or unicode), they are not complete documents (i.e., no <html> tag). @@ -310,8 +312,6 @@ def split_unbalanced(chunks): endtag = chunk[1] == '/' name = chunk.split()[0].strip('<>/') if name in empty_tags: - assert not endtag, ( - "Empty tag %r should have no end tag" % chunk) balanced.append(chunk) continue if endtag: @@ -669,7 +669,7 @@ def flatten_el(el, include_hrefs, skip_tag=False): yield ('img', el.attrib['src'], start_tag(el)) else: yield start_tag(el) - if el.tag in empty_tags and not el.text and not len(el): + if el.tag in empty_tags and not el.text and not len(el) and not el.tail: return start_words = split_words(el.text) for word in start_words: |