summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.txt2
-rw-r--r--src/lxml/parser.pxi14
-rw-r--r--src/lxml/tests/test_etree.py21
3 files changed, 30 insertions, 7 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index e3e9c05e..cbc9cb32 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -17,6 +17,8 @@ Features added
Bugs fixed
----------
+* Feed parser failed to honout the 'recover' option on parse errors.
+
* Target parser didn't call ``.close()`` on the target object if
parsing failed. Now it is guaranteed that ``.close()`` will be
called after parsing, regardless of the outcome.
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index b88ff708..7e6a7656 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -1042,6 +1042,7 @@ cdef class _FeedParser(_BaseParser):
cdef char* c_encoding
cdef int buffer_len
cdef int error
+ cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER
if python.PyString_Check(data):
if self._default_encoding is None:
c_encoding = NULL
@@ -1078,10 +1079,10 @@ cdef class _FeedParser(_BaseParser):
xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options)
error = xmlparser.xmlCtxtResetPush(
pctxt, c_data, buffer_len, NULL, c_encoding)
- py_buffer_len = py_buffer_len - buffer_len
- c_data = c_data + buffer_len
+ py_buffer_len -= buffer_len
+ c_data += buffer_len
- while error == 0 and py_buffer_len > 0:
+ while (recover or error == 0) and py_buffer_len > 0:
if py_buffer_len > python.INT_MAX:
buffer_len = python.INT_MAX
else:
@@ -1090,11 +1091,10 @@ cdef class _FeedParser(_BaseParser):
error = htmlparser.htmlParseChunk(pctxt, c_data, buffer_len, 0)
else:
error = xmlparser.xmlParseChunk(pctxt, c_data, buffer_len, 0)
- py_buffer_len = py_buffer_len - buffer_len
- c_data = c_data + buffer_len
+ py_buffer_len -= buffer_len
+ c_data += buffer_len
- if error or (not pctxt.wellFormed and
- not self._parse_options & xmlparser.XML_PARSE_RECOVER):
+ if not recover and (error or not pctxt.wellFormed):
self._feed_parser_running = 0
try:
context._handleParseResult(self, NULL, None)
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 9cf72776..59a6489d 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -580,6 +580,27 @@ class ETreeOnlyTestCase(HelperTestCase):
self.etree.XMLParser(encoding="utf-8")
self.etree.XMLParser(encoding="iso-8859-1")
+ def test_feed_parser_recover(self):
+ parser = self.etree.XMLParser(recover=True)
+
+ parser.feed('<?xml version=')
+ parser.feed('"1.0"?><ro')
+ parser.feed('ot><')
+ parser.feed('a test="works"')
+ parser.feed('><othertag/></root') # <a> not closed!
+ parser.feed('>')
+
+ root = parser.close()
+
+ self.assertEquals(root.tag, "root")
+ self.assertEquals(len(root), 1)
+ self.assertEquals(root[0].tag, "a")
+ self.assertEquals(root[0].get("test"), "works")
+ self.assertEquals(len(root[0]), 1)
+ self.assertEquals(root[0][0].tag, "othertag")
+ # FIXME: would be nice to get some errors logged ...
+ #self.assert_(len(parser.error_log) > 0, "error log is empty")
+
def test_elementtree_parser_target_type_error(self):
assertEquals = self.assertEquals
assertFalse = self.assertFalse