diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2018-03-10 13:18:30 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2018-03-10 13:18:30 +0100 |
commit | d6fdeee94084e172bb81cf92119a081362f0dbcf (patch) | |
tree | ff7076ad4d7fdd8ac08f734d1b9e16435545e659 | |
parent | d93dafab78150bd611ba9e30706829742234bbef (diff) | |
download | python-lxml-d6fdeee94084e172bb81cf92119a081362f0dbcf.tar.gz |
LP#1551797: Send XSLT errors to a thread-local stack of error logs and push/pop the transform's log on it around XSLT operations.
This should solve the problem of XSLT error messages getting lost and also work around the fact that the XSLT error function in libxslt is a global variable that is not thread-specfic.
-rw-r--r-- | CHANGES.txt | 2 | ||||
-rw-r--r-- | src/lxml/tests/test_threading.py | 74 | ||||
-rw-r--r-- | src/lxml/tests/test_xslt.py | 35 | ||||
-rw-r--r-- | src/lxml/xmlerror.pxi | 86 |
4 files changed, 168 insertions, 29 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 9c7c894a..128804be 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -21,6 +21,8 @@ Features added Bugs fixed ---------- +* LP#1551797: Some XSLT messages were not captured by the transform error log. + * LP#1737825: Crash at shutdown after an interrupted iterparse run with XMLSchema validation. diff --git a/src/lxml/tests/test_threading.py b/src/lxml/tests/test_threading.py index 153b6b55..8948c3ec 100644 --- a/src/lxml/tests/test_threading.py +++ b/src/lxml/tests/test_threading.py @@ -123,6 +123,80 @@ class ThreadingTestCase(HelperTestCase): self.assertEqual(_bytes('<a><b>B</b><c>C</c><foo><a>B</a></foo></a>'), tostring(root)) + def test_thread_xslt_parsing_error_log(self): + style = self.parse('''\ +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + <xsl:template match="tag" /> + <!-- extend time for parsing + transform --> +''' + '\n'.join('<xsl:template match="tag%x" />' % i for i in range(200)) + ''' + <xsl:foo /> +</xsl:stylesheet>''') + self.assertRaises(etree.XSLTParseError, + etree.XSLT, style) + + error_logs = [] + + def run_thread(): + try: + etree.XSLT(style) + except etree.XSLTParseError as e: + error_logs.append(e.error_log) + else: + self.assertFalse(True, "XSLT parsing should have failed but didn't") + + self._run_threads(16, run_thread) + + self.assertEqual(16, len(error_logs)) + last_log = None + for log in error_logs: + self.assertTrue(len(log)) + if last_log is not None: + self.assertEqual(len(last_log), len(log)) + self.assertEqual(4, len(log)) + for error in log: + self.assertTrue(':ERROR:XSLT:' in str(error)) + last_log = log + + def test_thread_xslt_apply_error_log(self): + tree = self.parse('<tagFF/>') + style = self.parse('''\ +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + <xsl:template name="tag0"> + <xsl:message terminate="yes">FAIL</xsl:message> + </xsl:template> + <!-- extend time for parsing + transform --> +''' + '\n'.join('<xsl:template match="tag%X" name="tag%x"> <xsl:call-template name="tag%x" /> </xsl:template>' % (i, i, i-1) + for i in range(1, 256)) + ''' +</xsl:stylesheet>''') + self.assertRaises(etree.XSLTApplyError, + etree.XSLT(style), tree) + + error_logs = [] + + def run_thread(): + transform = etree.XSLT(style) + try: + transform(tree) + except etree.XSLTApplyError: + error_logs.append(transform.error_log) + else: + self.assertFalse(True, "XSLT parsing should have failed but didn't") + + self._run_threads(16, run_thread) + + self.assertEqual(16, len(error_logs)) + last_log = None + for log in error_logs: + self.assertTrue(len(log)) + if last_log is not None: + self.assertEqual(len(last_log), len(log)) + self.assertEqual(1, len(log)) + for error in log: + self.assertTrue(':ERROR:XSLT:' in str(error)) + last_log = log + def test_thread_xslt_attr_replace(self): # this is the only case in XSLT where the result tree can be # modified in-place diff --git a/src/lxml/tests/test_xslt.py b/src/lxml/tests/test_xslt.py index 5543194c..96eb83ee 100644 --- a/src/lxml/tests/test_xslt.py +++ b/src/lxml/tests/test_xslt.py @@ -264,7 +264,7 @@ class ETreeXSLTTestCase(HelperTestCase): self.assertRaises(etree.XSLTParseError, etree.XSLT, style) - def _test_xslt_error_log(self): + def test_xslt_parsing_error_log(self): tree = self.parse('<a/>') style = self.parse('''\ <xsl:stylesheet version="1.0" @@ -275,7 +275,7 @@ class ETreeXSLTTestCase(HelperTestCase): etree.XSLT, style) exc = None try: - etree.XSLT(tree) + etree.XSLT(style) except etree.XSLTParseError as e: exc = e else: @@ -285,6 +285,37 @@ class ETreeXSLTTestCase(HelperTestCase): for error in exc.error_log: self.assertTrue(':ERROR:XSLT:' in str(error)) + def test_xslt_apply_error_log(self): + tree = self.parse('<a/>') + style = self.parse('''\ +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + <xsl:template match="a"> + <xsl:copy> + <xsl:message terminate="yes">FAIL</xsl:message> + </xsl:copy> + </xsl:template> +</xsl:stylesheet>''') + self.assertRaises(etree.XSLTApplyError, + etree.XSLT(style), tree) + + transform = etree.XSLT(style) + exc = None + try: + transform(tree) + except etree.XSLTApplyError as e: + exc = e + else: + self.assertFalse(True, "XSLT processing should have failed but didn't") + + self.assertTrue(exc is not None) + self.assertTrue(len(exc.error_log)) + self.assertEqual(len(transform.error_log), len(exc.error_log)) + for error in exc.error_log: + self.assertTrue(':ERROR:XSLT:' in str(error)) + for error in transform.error_log: + self.assertTrue(':ERROR:XSLT:' in str(error)) + def test_xslt_parameters(self): tree = self.parse('<a><b>B</b><c>C</c></a>') style = self.parse('''\ diff --git a/src/lxml/xmlerror.pxi b/src/lxml/xmlerror.pxi index a57ff975..3a7cacc8 100644 --- a/src/lxml/xmlerror.pxi +++ b/src/lxml/xmlerror.pxi @@ -3,6 +3,9 @@ from lxml.includes cimport xmlerror from lxml cimport cvarargs +DEF GLOBAL_ERROR_LOG = u"_GlobalErrorLog" +DEF XSLT_ERROR_LOG = u"_XSLTErrorLog" + # module level API functions def clear_error_log(): @@ -15,20 +18,18 @@ def clear_error_log(): and this function will only clear the global error log of the current thread. """ - _getGlobalErrorLog().clear() + _getThreadErrorLog(GLOBAL_ERROR_LOG).clear() # setup for global log: cdef void _initThreadLogging(): - # disable generic error lines from libxml2 + # Disable generic error lines from libxml2. _connectGenericErrorLog(None) - # divert error messages to the global error log - connectErrorLog(NULL) + # Divert XSLT error messages to the global XSLT error log instead of stderr. + xslt.xsltSetGenericErrorFunc(NULL, <xmlerror.xmlGenericErrorFunc>_receiveXSLTError) -cdef void connectErrorLog(void* log): - xslt.xsltSetGenericErrorFunc(log, <xmlerror.xmlGenericErrorFunc>_receiveXSLTError) # Logging classes @@ -201,7 +202,7 @@ cdef class _BaseErrorLog: entry._setError(error) is_error = error.level == xmlerror.XML_ERR_ERROR or \ error.level == xmlerror.XML_ERR_FATAL - global_log = _getGlobalErrorLog() + global_log = _getThreadErrorLog(GLOBAL_ERROR_LOG) if global_log is not self: global_log.receive(entry) if is_error: @@ -220,7 +221,7 @@ cdef class _BaseErrorLog: entry._setGeneric(domain, type, level, line, message, filename) is_error = level == xmlerror.XML_ERR_ERROR or \ level == xmlerror.XML_ERR_FATAL - global_log = _getGlobalErrorLog() + global_log = _getThreadErrorLog(GLOBAL_ERROR_LOG) if global_log is not self: global_log.receive(entry) if is_error: @@ -381,6 +382,7 @@ cdef class _ListErrorLog(_BaseErrorLog): """ return self.filter_from_level(ErrorLevels.WARNING) + @cython.final @cython.internal cdef class _ErrorLogContext: @@ -391,6 +393,34 @@ cdef class _ErrorLogContext: """ cdef xmlerror.xmlStructuredErrorFunc old_error_func cdef void* old_error_context + cdef xmlerror.xmlGenericErrorFunc old_xslt_error_func + cdef void* old_xslt_error_context + cdef _BaseErrorLog old_xslt_error_log + + cdef int push_error_log(self, _BaseErrorLog log) except -1: + self.old_error_func = xmlerror.xmlStructuredError + self.old_error_context = xmlerror.xmlStructuredErrorContext + xmlerror.xmlSetStructuredErrorFunc( + <void*>log, <xmlerror.xmlStructuredErrorFunc>_receiveError) + + # xslt.xsltSetGenericErrorFunc() is not thread-local => keep error log in TLS + self.old_xslt_error_func = xslt.xsltGenericError + self.old_xslt_error_context = xslt.xsltGenericErrorContext + self.old_xslt_error_log = _getThreadErrorLog(XSLT_ERROR_LOG) + _setThreadErrorLog(XSLT_ERROR_LOG, log) + xslt.xsltSetGenericErrorFunc( + NULL, <xmlerror.xmlGenericErrorFunc>_receiveXSLTError) + return 0 + + cdef int pop_error_log(self) except -1: + xmlerror.xmlSetStructuredErrorFunc( + self.old_error_context, self.old_error_func) + xslt.xsltSetGenericErrorFunc( + self.old_xslt_error_context, self.old_xslt_error_func) + _setThreadErrorLog(XSLT_ERROR_LOG, self.old_xslt_error_log) + self.old_xslt_error_log= None + return 0 + cdef class _ErrorLog(_ListErrorLog): cdef list _logContexts @@ -414,18 +444,14 @@ cdef class _ErrorLog(_ListErrorLog): del self._entries[:] cdef _ErrorLogContext context = _ErrorLogContext.__new__(_ErrorLogContext) - context.old_error_func = xmlerror.xmlStructuredError - context.old_error_context = xmlerror.xmlStructuredErrorContext + context.push_error_log(self) self._logContexts.append(context) - xmlerror.xmlSetStructuredErrorFunc( - <void*>self, <xmlerror.xmlStructuredErrorFunc>_receiveError) return 0 @cython.final cdef int disconnect(self) except -1: cdef _ErrorLogContext context = self._logContexts.pop() - xmlerror.xmlSetStructuredErrorFunc( - context.old_error_context, context.old_error_func) + context.pop_error_log() return 0 cpdef clear(self): @@ -553,35 +579,39 @@ cdef class PyErrorLog(_BaseErrorLog): # thread-local, global list log to collect error output messages from # libxml2/libxslt -cdef _BaseErrorLog __GLOBAL_ERROR_LOG -__GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) +cdef _BaseErrorLog __GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) + -cdef _BaseErrorLog _getGlobalErrorLog(): - u"""Retrieve the global error log of this thread.""" +cdef _BaseErrorLog _getThreadErrorLog(name): + u"""Retrieve the current error log with name 'name' of this thread.""" cdef python.PyObject* thread_dict thread_dict = python.PyThreadState_GetDict() if thread_dict is NULL: return __GLOBAL_ERROR_LOG try: - return (<object>thread_dict)[u"_GlobalErrorLog"] + return (<object>thread_dict)[name] except KeyError: - log = (<object>thread_dict)[u"_GlobalErrorLog"] = \ + log = (<object>thread_dict)[name] = \ _RotatingErrorLog(__MAX_LOG_SIZE) return log -cdef _setGlobalErrorLog(_BaseErrorLog log): + +cdef _setThreadErrorLog(name, _BaseErrorLog log): u"""Set the global error log of this thread.""" cdef python.PyObject* thread_dict thread_dict = python.PyThreadState_GetDict() if thread_dict is NULL: - global __GLOBAL_ERROR_LOG - __GLOBAL_ERROR_LOG = log + if name == GLOBAL_ERROR_LOG: + global __GLOBAL_ERROR_LOG + __GLOBAL_ERROR_LOG = log else: - (<object>thread_dict)[u"_GlobalErrorLog"] = log + (<object>thread_dict)[name] = log + cdef __copyGlobalErrorLog(): u"Helper function for properties in exceptions." - return _getGlobalErrorLog().copy() + return _getThreadErrorLog(GLOBAL_ERROR_LOG).copy() + def use_global_python_log(PyErrorLog log not None): u"""use_global_python_log(log) @@ -596,7 +626,7 @@ def use_global_python_log(PyErrorLog log not None): Since lxml 2.2, the global error log is local to a thread and this function will only set the global error log of the current thread. """ - _setGlobalErrorLog(log) + _setThreadErrorLog(GLOBAL_ERROR_LOG, log) # local log functions: forward error to logger object @@ -604,8 +634,10 @@ cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error) with gil: cdef _BaseErrorLog log_handler if c_log_handler is not NULL: log_handler = <_BaseErrorLog>c_log_handler + elif error.domain == xmlerror.XML_FROM_XSLT: + log_handler = _getThreadErrorLog(XSLT_ERROR_LOG) else: - log_handler = _getGlobalErrorLog() + log_handler = _getThreadErrorLog(GLOBAL_ERROR_LOG) log_handler._receive(error) |