diff options
author | scoder <none@none> | 2009-08-13 09:10:25 +0200 |
---|---|---|
committer | scoder <none@none> | 2009-08-13 09:10:25 +0200 |
commit | 010780b8e93a929ec2afbbb44ac0a8049415ace5 (patch) | |
tree | 75905d97efc474cf23d38dee4694dce640ccd147 | |
parent | 1fff389a0cf796fd185cbbc4d17176dfb384d2e5 (diff) | |
download | python-lxml-010780b8e93a929ec2afbbb44ac0a8049415ace5.tar.gz |
[svn r4190] r5203@delle: sbehnel | 2009-08-13 09:06:20 +0200
fix recover flag in feed parser
--HG--
branch : trunk
-rw-r--r-- | CHANGES.txt | 2 | ||||
-rw-r--r-- | src/lxml/parser.pxi | 14 | ||||
-rw-r--r-- | src/lxml/tests/test_etree.py | 21 |
3 files changed, 30 insertions, 7 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index e3e9c05e..cbc9cb32 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,8 @@ Features added Bugs fixed ---------- +* Feed parser failed to honout the 'recover' option on parse errors. + * Target parser didn't call ``.close()`` on the target object if parsing failed. Now it is guaranteed that ``.close()`` will be called after parsing, regardless of the outcome. diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index b88ff708..7e6a7656 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -1042,6 +1042,7 @@ cdef class _FeedParser(_BaseParser): cdef char* c_encoding cdef int buffer_len cdef int error + cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER if python.PyString_Check(data): if self._default_encoding is None: c_encoding = NULL @@ -1078,10 +1079,10 @@ cdef class _FeedParser(_BaseParser): xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) error = xmlparser.xmlCtxtResetPush( pctxt, c_data, buffer_len, NULL, c_encoding) - py_buffer_len = py_buffer_len - buffer_len - c_data = c_data + buffer_len + py_buffer_len -= buffer_len + c_data += buffer_len - while error == 0 and py_buffer_len > 0: + while (recover or error == 0) and py_buffer_len > 0: if py_buffer_len > python.INT_MAX: buffer_len = python.INT_MAX else: @@ -1090,11 +1091,10 @@ cdef class _FeedParser(_BaseParser): error = htmlparser.htmlParseChunk(pctxt, c_data, buffer_len, 0) else: error = xmlparser.xmlParseChunk(pctxt, c_data, buffer_len, 0) - py_buffer_len = py_buffer_len - buffer_len - c_data = c_data + buffer_len + py_buffer_len -= buffer_len + c_data += buffer_len - if error or (not pctxt.wellFormed and - not self._parse_options & xmlparser.XML_PARSE_RECOVER): + if not recover and (error or not pctxt.wellFormed): self._feed_parser_running = 0 try: context._handleParseResult(self, NULL, None) diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index 9cf72776..59a6489d 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -580,6 +580,27 @@ class ETreeOnlyTestCase(HelperTestCase): self.etree.XMLParser(encoding="utf-8") self.etree.XMLParser(encoding="iso-8859-1") + def test_feed_parser_recover(self): + parser = self.etree.XMLParser(recover=True) + + parser.feed('<?xml version=') + parser.feed('"1.0"?><ro') + parser.feed('ot><') + parser.feed('a test="works"') + parser.feed('><othertag/></root') # <a> not closed! + parser.feed('>') + + root = parser.close() + + self.assertEquals(root.tag, "root") + self.assertEquals(len(root), 1) + self.assertEquals(root[0].tag, "a") + self.assertEquals(root[0].get("test"), "works") + self.assertEquals(len(root[0]), 1) + self.assertEquals(root[0][0].tag, "othertag") + # FIXME: would be nice to get some errors logged ... + #self.assert_(len(parser.error_log) > 0, "error log is empty") + def test_elementtree_parser_target_type_error(self): assertEquals = self.assertEquals assertFalse = self.assertFalse |