#13358: HTMLParser now calls handle_data only once for each CDATA.

author: Ezio Melotti <ezio.melotti@gmail.com> 2011-11-18 18:01:49 +0200
committer: Ezio Melotti <ezio.melotti@gmail.com> 2011-11-18 18:01:49 +0200
commit: dfa5ae03e1c6f006427d421bda6b01d346ac0e99 (patch)
tree: 1e941838af42ddb452272ada777b2c91c746b203 /Lib/html
parent: 081bb73a0a87ab32da6b757157a63a7907147ace (diff)
download: cpython-dfa5ae03e1c6f006427d421bda6b01d346ac0e99.tar.gz
1 files changed, 4 insertions, 3 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 662e85575a..dd9c2e1486 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -14,7 +14,6 @@ import re
 # Regular expressions used for parsing
 
 interesting_normal = re.compile('[&<]')
-interesting_cdata = re.compile(r'<(/|\Z)')
 incomplete = re.compile('&[a-zA-Z#]')
 
 entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
@@ -149,8 +148,8 @@ class HTMLParser(_markupbase.ParserBase):
         return self.__starttag_text
 
     def set_cdata_mode(self, elem):
-        self.interesting = interesting_cdata
         self.cdata_elem = elem.lower()
+        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
 
     def clear_cdata_mode(self):
         self.interesting = interesting_normal
@@ -168,6 +167,8 @@ class HTMLParser(_markupbase.ParserBase):
             if match:
                 j = match.start()
             else:
+                if self.cdata_elem:
+                    break
                 j = n
             if i < j: self.handle_data(rawdata[i:j])
             i = self.updatepos(i, j)
@@ -250,7 +251,7 @@ class HTMLParser(_markupbase.ParserBase):
             else:
                 assert 0, "interesting.search() lied"
         # end while
-        if end and i < n:
+        if end and i < n and not self.cdata_elem:
             self.handle_data(rawdata[i:n])
             i = self.updatepos(i, n)
         self.rawdata = rawdata[i:]
author	Ezio Melotti <ezio.melotti@gmail.com>	2011-11-18 18:01:49 +0200
committer	Ezio Melotti <ezio.melotti@gmail.com>	2011-11-18 18:01:49 +0200
commit	dfa5ae03e1c6f006427d421bda6b01d346ac0e99 (patch)
tree	1e941838af42ddb452272ada777b2c91c746b203 /Lib/html
parent	081bb73a0a87ab32da6b757157a63a7907147ace (diff)
download	cpython-dfa5ae03e1c6f006427d421bda6b01d346ac0e99.tar.gz