1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
|
# XSLT
from lxml.includes cimport xslt
cdef class XSLTError(LxmlError):
"""Base class of all XSLT errors.
"""
cdef class XSLTParseError(XSLTError):
"""Error parsing a stylesheet document.
"""
cdef class XSLTApplyError(XSLTError):
"""Error running an XSL transformation.
"""
class XSLTSaveError(XSLTError, SerialisationError):
"""Error serialising an XSLT result.
"""
cdef class XSLTExtensionError(XSLTError):
"""Error registering an XSLT extension.
"""
# version information
LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION)
LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion)
################################################################################
# Where do we store what?
#
# xsltStylesheet->doc->_private
# == _XSLTResolverContext for XSL stylesheet
#
# xsltTransformContext->_private
# == _XSLTResolverContext for transformed document
#
################################################################################
################################################################################
# XSLT document loaders
@cython.final
@cython.internal
cdef class _XSLTResolverContext(_ResolverContext):
cdef xmlDoc* _c_style_doc
cdef _BaseParser _parser
cdef _XSLTResolverContext _copy(self):
cdef _XSLTResolverContext context
context = _XSLTResolverContext()
_initXSLTResolverContext(context, self._parser)
context._c_style_doc = self._c_style_doc
return context
cdef _initXSLTResolverContext(_XSLTResolverContext context,
_BaseParser parser):
_initResolverContext(context, parser.resolvers)
context._parser = parser
context._c_style_doc = NULL
cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context,
int parse_options, int* error) with gil:
# call the Python document loaders
cdef _XSLTResolverContext context
cdef _ResolverRegistry resolvers
cdef _InputDocument doc_ref
cdef xmlDoc* c_doc
cdef xmlDoc* c_return_doc = NULL
error[0] = 0
context = <_XSLTResolverContext>c_context
# shortcut if we resolve the stylesheet itself
c_doc = context._c_style_doc
try:
if c_doc is not NULL and c_doc.URL is not NULL:
if tree.xmlStrcmp(c_uri, c_doc.URL) == 0:
c_return_doc = _copyDoc(c_doc, 1)
return c_return_doc # 'goto', see 'finally' below
# delegate to the Python resolvers
resolvers = context._resolvers
if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0:
c_uri += 26
uri = _decodeFilename(c_uri)
doc_ref = resolvers.resolve(uri, None, context)
if doc_ref is not None:
if doc_ref._type == PARSER_DATA_STRING:
c_return_doc = _parseDoc(
doc_ref._data_bytes, doc_ref._filename, context._parser)
elif doc_ref._type == PARSER_DATA_FILENAME:
c_return_doc = _parseDocFromFile(
doc_ref._filename, context._parser)
elif doc_ref._type == PARSER_DATA_FILE:
c_return_doc = _parseDocFromFilelike(
doc_ref._file, doc_ref._filename, context._parser)
elif doc_ref._type == PARSER_DATA_EMPTY:
c_return_doc = _newXMLDoc()
if c_return_doc is not NULL and c_return_doc.URL is NULL:
c_return_doc.URL = tree.xmlStrdup(c_uri)
except:
error[0] = 1
context._store_raised()
finally:
return c_return_doc # and swallow any further exceptions
cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context,
xslt.xsltLoadType c_type) with gil:
try:
message = u"Cannot resolve URI %s" % _decodeFilename(c_uri)
if c_type == xslt.XSLT_LOAD_DOCUMENT:
exception = XSLTApplyError(message)
else:
exception = XSLTParseError(message)
(<_XSLTResolverContext>context)._store_exception(exception)
except BaseException as e:
(<_XSLTResolverContext>context)._store_exception(e)
cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict,
int parse_options, void* c_ctxt,
xslt.xsltLoadType c_type) nogil:
# nogil => no Python objects here, may be called without thread context !
cdef xmlDoc* c_doc
cdef xmlDoc* result
cdef void* c_pcontext
cdef int error = 0
# find resolver contexts of stylesheet and transformed doc
if c_type == xslt.XSLT_LOAD_DOCUMENT:
# transformation time
c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private
elif c_type == xslt.XSLT_LOAD_STYLESHEET:
# include/import resolution while parsing
c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private
else:
c_pcontext = NULL
if c_pcontext is NULL:
# can't call Python without context, fall back to default loader
return XSLT_DOC_DEFAULT_LOADER(
c_uri, c_dict, parse_options, c_ctxt, c_type)
c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error)
if c_doc is NULL and not error:
c_doc = XSLT_DOC_DEFAULT_LOADER(
c_uri, c_dict, parse_options, c_ctxt, c_type)
if c_doc is NULL:
_xslt_store_resolver_exception(c_uri, c_pcontext, c_type)
if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET:
c_doc._private = c_pcontext
return c_doc
cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader
xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader)
################################################################################
# XSLT file/network access control
cdef class XSLTAccessControl:
u"""XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True)
Access control for XSLT: reading/writing files, directories and
network I/O. Access to a type of resource is granted or denied by
passing any of the following boolean keyword arguments. All of
them default to True to allow access.
- read_file
- write_file
- create_dir
- read_network
- write_network
For convenience, there is also a class member `DENY_ALL` that
provides an XSLTAccessControl instance that is readily configured
to deny everything, and a `DENY_WRITE` member that denies all
write access but allows read access.
See `XSLT`.
"""
cdef xslt.xsltSecurityPrefs* _prefs
def __cinit__(self):
self._prefs = xslt.xsltNewSecurityPrefs()
if self._prefs is NULL:
raise MemoryError()
def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True,
bint read_network=True, bint write_network=True):
self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file)
self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file)
self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir)
self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network)
self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network)
DENY_ALL = XSLTAccessControl(
read_file=False, write_file=False, create_dir=False,
read_network=False, write_network=False)
DENY_WRITE = XSLTAccessControl(
read_file=True, write_file=False, create_dir=False,
read_network=True, write_network=False)
def __dealloc__(self):
if self._prefs is not NULL:
xslt.xsltFreeSecurityPrefs(self._prefs)
@cython.final
cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow):
cdef xslt.xsltSecurityCheck function
if allow:
function = xslt.xsltSecurityAllow
else:
function = xslt.xsltSecurityForbid
xslt.xsltSetSecurityPrefs(self._prefs, option, function)
@cython.final
cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt):
xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt)
property options:
u"The access control configuration as a map of options."
def __get__(self):
return {
u'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE),
u'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE),
u'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY),
u'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK),
u'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK),
}
@cython.final
cdef _optval(self, xslt.xsltSecurityOption option):
cdef xslt.xsltSecurityCheck function
function = xslt.xsltGetSecurityPrefs(self._prefs, option)
if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow:
return True
elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid:
return False
else:
return None
def __repr__(self):
items = sorted(self.options.items())
return u"%s(%s)" % (
python._fqtypename(self).decode('UTF-8').split(u'.')[-1],
u', '.join([u"%s=%r" % item for item in items]))
################################################################################
# XSLT
cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
if ns_utf is None:
return 0
# libxml2 internalises the strings if ctxt has a dict
return xslt.xsltRegisterExtFunction(
<xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
<xslt.xmlXPathFunction>_xpath_function_call)
cdef dict EMPTY_DICT = {}
@cython.final
@cython.internal
cdef class _XSLTContext(_BaseContext):
cdef xslt.xsltTransformContext* _xsltCtxt
cdef _ReadOnlyElementProxy _extension_element_proxy
cdef dict _extension_elements
def __cinit__(self):
self._xsltCtxt = NULL
self._extension_elements = EMPTY_DICT
def __init__(self, namespaces, extensions, error_log, enable_regexp,
build_smart_strings):
if extensions is not None and extensions:
for ns_name_tuple, extension in extensions.items():
if ns_name_tuple[0] is None:
raise XSLTExtensionError, \
u"extensions must not have empty namespaces"
if isinstance(extension, XSLTExtension):
if self._extension_elements is EMPTY_DICT:
self._extension_elements = {}
extensions = extensions.copy()
ns_utf = _utf8(ns_name_tuple[0])
name_utf = _utf8(ns_name_tuple[1])
self._extension_elements[(ns_utf, name_utf)] = extension
del extensions[ns_name_tuple]
_BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
build_smart_strings)
cdef _BaseContext _copy(self):
cdef _XSLTContext context
context = <_XSLTContext>_BaseContext._copy(self)
context._extension_elements = self._extension_elements
return context
cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
_Document doc):
self._xsltCtxt = xsltCtxt
self._set_xpath_context(xsltCtxt.xpathCtxt)
self._register_context(doc)
self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
_registerXSLTExtensions(xsltCtxt, self._extension_elements)
cdef free_context(self):
self._cleanup_context()
self._release_context()
if self._xsltCtxt is not NULL:
xslt.xsltFreeTransformContext(self._xsltCtxt)
self._xsltCtxt = NULL
self._release_temp_refs()
@cython.final
@cython.internal
@cython.freelist(8)
cdef class _XSLTQuotedStringParam:
u"""A wrapper class for literal XSLT string parameters that require
quote escaping.
"""
cdef bytes strval
def __cinit__(self, strval):
self.strval = _utf8(strval)
@cython.no_gc_clear
cdef class XSLT:
u"""XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None)
Turn an XSL document into an XSLT object.
Calling this object on a tree or Element will execute the XSLT::
transform = etree.XSLT(xsl_tree)
result = transform(xml_tree)
Keyword arguments of the constructor:
- extensions: a dict mapping ``(namespace, name)`` pairs to
extension functions or extension elements
- regexp: enable exslt regular expression support in XPath
(default: True)
- access_control: access restrictions for network or file
system (see `XSLTAccessControl`)
Keyword arguments of the XSLT call:
- profile_run: enable XSLT profiling (default: False)
Other keyword arguments of the call are passed to the stylesheet
as parameters.
"""
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
cdef _XSLTResolverContext _xslt_resolver_context
cdef XSLTAccessControl _access_control
cdef _ErrorLog _error_log
def __cinit__(self):
self._c_style = NULL
def __init__(self, xslt_input, *, extensions=None, regexp=True,
access_control=None):
cdef xslt.xsltStylesheet* c_style = NULL
cdef xmlDoc* c_doc
cdef _Document doc
cdef _Element root_node
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOrRaise(xslt_input)
# set access control or raise TypeError
self._access_control = access_control
# make a copy of the document as stylesheet parsing modifies it
c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
# make sure we always have a stylesheet URL
if c_doc.URL is NULL:
doc_url_utf = python.PyUnicode_AsASCIIString(
u"string://__STRING__XSLT__/%d.xslt" % id(self))
c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf))
self._error_log = _ErrorLog()
self._xslt_resolver_context = _XSLTResolverContext()
_initXSLTResolverContext(self._xslt_resolver_context, doc._parser)
# keep a copy in case we need to access the stylesheet via 'document()'
self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1)
c_doc._private = <python.PyObject*>self._xslt_resolver_context
with self._error_log:
c_style = xslt.xsltParseStylesheetDoc(c_doc)
if c_style is NULL or c_style.errors:
tree.xmlFreeDoc(c_doc)
if c_style is not NULL:
xslt.xsltFreeStylesheet(c_style)
self._xslt_resolver_context._raise_if_stored()
# last error seems to be the most accurate here
if self._error_log.last_error is not None and \
self._error_log.last_error.message:
raise XSLTParseError(self._error_log.last_error.message,
self._error_log)
else:
raise XSLTParseError(
self._error_log._buildExceptionMessage(
u"Cannot parse stylesheet"),
self._error_log)
c_doc._private = NULL # no longer used!
self._c_style = c_style
self._context = _XSLTContext(None, extensions, self._error_log, regexp, True)
def __dealloc__(self):
if self._xslt_resolver_context is not None and \
self._xslt_resolver_context._c_style_doc is not NULL:
tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
# this cleans up the doc copy as well
if self._c_style is not NULL:
xslt.xsltFreeStylesheet(self._c_style)
property error_log:
u"The log of errors and warnings of an XSLT execution."
def __get__(self):
return self._error_log.copy()
@staticmethod
def strparam(strval):
u"""strparam(strval)
Mark an XSLT string parameter that requires quote escaping
before passing it into the transformation. Use it like this::
result = transform(doc, some_strval = XSLT.strparam(
'''it's \"Monty Python's\" ...'''))
Escaped string parameters can be reused without restriction.
"""
return _XSLTQuotedStringParam(strval)
@staticmethod
def set_global_max_depth(int max_depth):
u"""set_global_max_depth(max_depth)
The maximum traversal depth that the stylesheet engine will allow.
This does not only count the template recursion depth but also takes
the number of variables/parameters into account. The required setting
for a run depends on both the stylesheet and the input data.
Example::
XSLT.set_global_max_depth(5000)
Note that this is currently a global, module-wide setting because
libxslt does not support it at a per-stylesheet level.
"""
if max_depth < 0:
raise ValueError("cannot set a maximum stylesheet traversal depth < 0")
xslt.xsltMaxDepth = max_depth
def apply(self, _input, *, profile_run=False, **kw):
u"""apply(self, _input, profile_run=False, **kw)
:deprecated: call the object, not this method."""
return self(_input, profile_run=profile_run, **kw)
def tostring(self, _ElementTree result_tree):
u"""tostring(self, result_tree)
Save result doc to string based on stylesheet output method.
:deprecated: use str(result_tree) instead.
"""
return str(result_tree)
def __deepcopy__(self, memo):
return self.__copy__()
def __copy__(self):
return _copyXSLT(self)
def __call__(self, _input, *, profile_run=False, **kw):
u"""__call__(self, _input, profile_run=False, **kw)
Execute the XSL transformation on a tree or Element.
Pass the ``profile_run`` option to get profile information
about the XSLT. The result of the XSLT will have a property
xslt_profile that holds an XML tree with profiling data.
"""
cdef _XSLTContext context = None
cdef _XSLTResolverContext resolver_context
cdef _Document input_doc
cdef _Element root_node
cdef _Document result_doc
cdef _Document profile_doc = None
cdef xmlDoc* c_profile_doc
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result = NULL
cdef xmlDoc* c_doc
cdef tree.xmlDict* c_dict
cdef const_char** params = NULL
assert self._c_style is not NULL, "XSLT stylesheet not initialised"
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
if transform_ctxt is NULL:
_destroyFakeDoc(input_doc._c_doc, c_doc)
raise MemoryError()
# using the stylesheet dict is safer than using a possibly
# unrelated dict from the current thread. Almost all
# non-input tag/attr names will come from the stylesheet
# anyway.
if transform_ctxt.dict is not NULL:
xmlparser.xmlDictFree(transform_ctxt.dict)
if kw:
# parameter values are stored in the dict
# => avoid unnecessarily cluttering the global dict
transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict)
if transform_ctxt.dict is NULL:
xslt.xsltFreeTransformContext(transform_ctxt)
raise MemoryError()
else:
transform_ctxt.dict = self._c_style.doc.dict
xmlparser.xmlDictReference(transform_ctxt.dict)
xslt.xsltSetCtxtParseOptions(
transform_ctxt, input_doc._parser._parse_options)
if profile_run:
transform_ctxt.profile = 1
try:
context = self._context._copy()
context.register_context(transform_ctxt, input_doc)
resolver_context = self._xslt_resolver_context._copy()
transform_ctxt._private = <python.PyObject*>resolver_context
_convert_xslt_parameters(transform_ctxt, kw, ¶ms)
c_result = self._run_transform(
c_doc, params, context, transform_ctxt)
if params is not NULL:
# deallocate space for parameters
python.lxml_free(params)
if transform_ctxt.state != xslt.XSLT_STATE_OK:
if c_result is not NULL:
tree.xmlFreeDoc(c_result)
c_result = NULL
if transform_ctxt.profile:
c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
if c_profile_doc is not NULL:
profile_doc = _documentFactory(
c_profile_doc, input_doc._parser)
finally:
if context is not None:
context.free_context()
_destroyFakeDoc(input_doc._c_doc, c_doc)
try:
if resolver_context is not None and resolver_context._has_raised():
if c_result is not NULL:
tree.xmlFreeDoc(c_result)
c_result = NULL
resolver_context._raise_if_stored()
if context._exc._has_raised():
if c_result is not NULL:
tree.xmlFreeDoc(c_result)
c_result = NULL
context._exc._raise_if_stored()
if c_result is NULL:
# last error seems to be the most accurate here
error = self._error_log.last_error
if error is not None and error.message:
if error.line > 0:
message = u"%s, line %d" % (error.message, error.line)
else:
message = error.message
elif error is not None and error.line > 0:
message = u"Error applying stylesheet, line %d" % error.line
else:
message = u"Error applying stylesheet"
raise XSLTApplyError(message, self._error_log)
finally:
if resolver_context is not None:
resolver_context.clear()
result_doc = _documentFactory(c_result, input_doc._parser)
c_dict = c_result.dict
xmlparser.xmlDictReference(c_dict)
__GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict)
if c_dict is not c_result.dict or \
self._c_style.doc.dict is not c_result.dict or \
input_doc._c_doc.dict is not c_result.dict:
with nogil:
if c_dict is not c_result.dict:
fixThreadDictNames(<xmlNode*>c_result,
c_dict, c_result.dict)
if self._c_style.doc.dict is not c_result.dict:
fixThreadDictNames(<xmlNode*>c_result,
self._c_style.doc.dict, c_result.dict)
if input_doc._c_doc.dict is not c_result.dict:
fixThreadDictNames(<xmlNode*>c_result,
input_doc._c_doc.dict, c_result.dict)
xmlparser.xmlDictFree(c_dict)
return _xsltResultTreeFactory(result_doc, self, profile_doc)
cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc,
const_char** params, _XSLTContext context,
xslt.xsltTransformContext* transform_ctxt):
cdef xmlDoc* c_result
xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log,
<xmlerror.xmlGenericErrorFunc>_receiveXSLTError)
if self._access_control is not None:
self._access_control._register_in_context(transform_ctxt)
with nogil:
c_result = xslt.xsltApplyStylesheetUser(
self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
return c_result
cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
dict parameters, const_char*** params_ptr):
cdef Py_ssize_t i, parameter_count
cdef const_char** params
cdef tree.xmlDict* c_dict = transform_ctxt.dict
params_ptr[0] = NULL
parameter_count = len(parameters)
if parameter_count == 0:
return
# allocate space for parameters
# * 2 as we want an entry for both key and value,
# and + 1 as array is NULL terminated
params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*))
if not params:
raise MemoryError()
try:
i = 0
for key, value in parameters.iteritems():
k = _utf8(key)
if isinstance(value, _XSLTQuotedStringParam):
v = (<_XSLTQuotedStringParam>value).strval
xslt.xsltQuoteOneUserParam(
transform_ctxt, _xcstr(k), _xcstr(v))
else:
if isinstance(value, XPath):
v = (<XPath>value)._path
else:
v = _utf8(value)
params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(k), len(k))
i += 1
params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(v), len(v))
i += 1
except:
python.lxml_free(params)
raise
params[i] = NULL
params_ptr[0] = params
cdef XSLT _copyXSLT(XSLT stylesheet):
cdef XSLT new_xslt
cdef xmlDoc* c_doc
assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
new_xslt = XSLT.__new__(XSLT)
new_xslt._access_control = stylesheet._access_control
new_xslt._error_log = _ErrorLog()
new_xslt._context = stylesheet._context._copy()
new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy()
new_xslt._xslt_resolver_context._c_style_doc = _copyDoc(
stylesheet._xslt_resolver_context._c_style_doc, 1)
c_doc = _copyDoc(stylesheet._c_style.doc, 1)
new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc)
if new_xslt._c_style is NULL:
tree.xmlFreeDoc(c_doc)
raise MemoryError()
return new_xslt
@cython.final
cdef class _XSLTResultTree(_ElementTree):
"""The result of an XSLT evaluation.
Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string,
and the ``.write_output()`` method to write serialise to a file.
"""
cdef XSLT _xslt
cdef _Document _profile
cdef xmlChar* _buffer
cdef Py_ssize_t _buffer_len
cdef Py_ssize_t _buffer_refcnt
def write_output(self, file, *, compression=0):
"""write_output(self, file, *, compression=0)
Serialise the XSLT output to a file or file-like object.
As opposed to the generic ``.write()`` method, ``.write_output()`` serialises
the result as defined by the ``<xsl:output>`` tag.
"""
cdef _FilelikeWriter writer = None
cdef _Document doc
cdef int r, c_compression
cdef const_xmlChar* c_encoding = NULL
cdef tree.xmlOutputBuffer* c_buffer
if self._context_node is not None:
doc = self._context_node._doc
else:
doc = None
if doc is None:
doc = self._doc
if doc is None:
raise XSLTSaveError("No document to serialise")
c_compression = compression or 0
if _isString(file):
file_path = _encodeFilename(file)
c_filename = _cstr(file_path)
with nogil:
r = xslt.xsltSaveResultToFilename(
c_filename, doc._c_doc, self._xslt._c_style, c_compression)
else:
xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
writer = _create_output_buffer(file, <const_char*>c_encoding, compression, &c_buffer, close=False)
if writer is None:
with nogil:
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
else:
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
if writer is not None:
writer._exc_context._raise_if_stored()
if r == -1:
python.PyErr_SetFromErrno(XSLTSaveError) # raises
cdef _saveToStringAndSize(self, xmlChar** s, int* l):
cdef _Document doc
cdef int r
if self._context_node is not None:
doc = self._context_node._doc
else:
doc = None
if doc is None:
doc = self._doc
if doc is None:
s[0] = NULL
return
with nogil:
r = xslt.xsltSaveResultToString(s, l, doc._c_doc,
self._xslt._c_style)
if r == -1:
raise MemoryError()
def __str__(self):
cdef xmlChar* s = NULL
cdef int l = 0
if not python.IS_PYTHON2:
return self.__unicode__()
self._saveToStringAndSize(&s, &l)
if s is NULL:
return ''
# we must not use 'funicode()' here as this is not always UTF-8
try:
result = <bytes>s[:l]
finally:
tree.xmlFree(s)
return result
def __unicode__(self):
cdef xmlChar* encoding
cdef xmlChar* s = NULL
cdef int l = 0
self._saveToStringAndSize(&s, &l)
if s is NULL:
return u''
encoding = self._xslt._c_style.encoding
try:
if encoding is NULL:
result = s[:l].decode('UTF-8')
else:
result = s[:l].decode(encoding)
finally:
tree.xmlFree(s)
return _stripEncodingDeclaration(result)
def __getbuffer__(self, Py_buffer* buffer, int flags):
cdef int l = 0
if buffer is NULL:
return
if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l)
buffer.len = l
if self._buffer is NULL and not flags & python.PyBUF_WRITABLE:
self._buffer = <xmlChar*>buffer.buf
self._buffer_len = l
self._buffer_refcnt = 1
else:
buffer.buf = self._buffer
buffer.len = self._buffer_len
self._buffer_refcnt += 1
if flags & python.PyBUF_WRITABLE:
buffer.readonly = 0
else:
buffer.readonly = 1
if flags & python.PyBUF_FORMAT:
buffer.format = "B"
else:
buffer.format = NULL
buffer.ndim = 0
buffer.shape = NULL
buffer.strides = NULL
buffer.suboffsets = NULL
buffer.itemsize = 1
buffer.internal = NULL
if buffer.obj is not self: # set by Cython?
buffer.obj = self
def __releasebuffer__(self, Py_buffer* buffer):
if buffer is NULL:
return
if <xmlChar*>buffer.buf is self._buffer:
self._buffer_refcnt -= 1
if self._buffer_refcnt == 0:
tree.xmlFree(<char*>self._buffer)
self._buffer = NULL
else:
tree.xmlFree(<char*>buffer.buf)
buffer.buf = NULL
property xslt_profile:
u"""Return an ElementTree with profiling data for the stylesheet run.
"""
def __get__(self):
cdef object root
if self._profile is None:
return None
root = self._profile.getroot()
if root is None:
return None
return ElementTree(root)
def __del__(self):
self._profile = None
cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
cdef _XSLTResultTree result
result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
result._xslt = xslt
result._profile = profile
return result
# functions like "output" and "write" are a potential security risk, but we
# rely on the user to configure XSLTAccessControl as needed
xslt.xsltRegisterAllExtras()
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
################################################################################
# XSLT PI support
cdef object _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
cdef XPath __findStylesheetByID = None
cdef _findStylesheetByID(_Document doc, id):
global __findStylesheetByID
if __findStylesheetByID is None:
__findStylesheetByID = XPath(
u"//xsl:stylesheet[@xml:id = $id]",
namespaces={u"xsl" : u"http://www.w3.org/1999/XSL/Transform"})
return __findStylesheetByID(doc, id=id)
cdef class _XSLTProcessingInstruction(PIBase):
def parseXSL(self, parser=None):
u"""parseXSL(self, parser=None)
Try to parse the stylesheet referenced by this PI and return
an ElementTree for it. If the stylesheet is embedded in the
same document (referenced via xml:id), find and return an
ElementTree for the stylesheet Element.
The optional ``parser`` keyword argument can be passed to specify the
parser used to read from external stylesheet URLs.
"""
cdef _Document result_doc
cdef _Element result_node
cdef bytes href_utf
cdef const_xmlChar* c_href
cdef xmlAttr* c_attr
_assertValidNode(self)
if self._c_node.content is NULL:
raise ValueError, u"PI lacks content"
hrefs = _FIND_PI_HREF(u' ' + (<unsigned char*>self._c_node.content).decode('UTF-8'))
if len(hrefs) != 1:
raise ValueError, u"malformed PI attributes"
hrefs = hrefs[0]
href_utf = utf8(hrefs[0] or hrefs[1])
c_href = _xcstr(href_utf)
if c_href[0] != c'#':
# normal URL, try to parse from it
c_href = tree.xmlBuildURI(
c_href,
tree.xmlNodeGetBase(self._c_node.doc, self._c_node))
if c_href is not NULL:
try:
href_utf = <unsigned char*>c_href
finally:
tree.xmlFree(<char*>c_href)
result_doc = _parseDocumentFromURL(href_utf, parser)
return _elementTreeFactory(result_doc, None)
# ID reference to embedded stylesheet
# try XML:ID lookup
_assertValidDoc(self._doc)
c_href += 1 # skip leading '#'
c_attr = tree.xmlGetID(self._c_node.doc, c_href)
if c_attr is not NULL and c_attr.doc is self._c_node.doc:
result_node = _elementFactory(self._doc, c_attr.parent)
return _elementTreeFactory(result_node._doc, result_node)
# try XPath search
root = _findStylesheetByID(self._doc, funicode(c_href))
if not root:
raise ValueError, u"reference to non-existing embedded stylesheet"
elif len(root) > 1:
raise ValueError, u"ambiguous reference to embedded stylesheet"
result_node = root[0]
return _elementTreeFactory(result_node._doc, result_node)
def set(self, key, value):
u"""set(self, key, value)
Supports setting the 'href' pseudo-attribute in the text of
the processing instruction.
"""
if key != u"href":
raise AttributeError, \
u"only setting the 'href' attribute is supported on XSLT-PIs"
if value is None:
attrib = u""
elif u'"' in value or u'>' in value:
raise ValueError, u"Invalid URL, must not contain '\"' or '>'"
else:
attrib = u' href="%s"' % value
text = u' ' + self.text
if _FIND_PI_HREF(text):
self.text = _REPLACE_PI_HREF(attrib, text)
else:
self.text = text + attrib
|