summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.appveyor.yml2
-rw-r--r--.hgignore1
-rw-r--r--.travis.yml77
-rw-r--r--CHANGES.txt35
-rw-r--r--DD.py52
-rw-r--r--INSTALL.txt15
-rw-r--r--MANIFEST.in2
-rw-r--r--Makefile2
-rw-r--r--benchmark/benchbase.py10
-rw-r--r--buildlibxml.py12
-rw-r--r--doc/FAQ.txt4
-rw-r--r--doc/api.txt5
-rw-r--r--doc/build.txt5
-rw-r--r--doc/intro.txt2
-rw-r--r--doc/lxml-source-howto.txt4
-rw-r--r--doc/main.txt2
-rw-r--r--doc/xpathxslt.txt2
-rw-r--r--requirements.txt2
-rw-r--r--setup.py6
-rw-r--r--setupinfo.py8
-rw-r--r--src/lxml/_elementpath.py2
-rw-r--r--src/lxml/apihelpers.pxi57
-rw-r--r--src/lxml/builder.pxd1
-rw-r--r--src/lxml/builder.py2
-rw-r--r--src/lxml/classlookup.pxi4
-rw-r--r--src/lxml/doctestcompare.py13
-rw-r--r--src/lxml/etree.pyx24
-rw-r--r--src/lxml/html/clean.py9
-rw-r--r--src/lxml/html/diff.py5
-rw-r--r--src/lxml/html/tests/test_autolink.py5
-rw-r--r--src/lxml/html/tests/test_basic.py5
-rw-r--r--src/lxml/html/tests/test_clean.py9
-rw-r--r--src/lxml/html/tests/test_diff.py7
-rw-r--r--src/lxml/html/tests/test_feedparser_data.py27
-rw-r--r--src/lxml/html/tests/test_formfill.py5
-rw-r--r--src/lxml/html/tests/test_forms.py5
-rw-r--r--src/lxml/html/tests/test_html5parser.py20
-rw-r--r--src/lxml/html/tests/test_rewritelinks.py5
-rw-r--r--src/lxml/html/tests/test_select.py2
-rw-r--r--src/lxml/html/tests/test_xhtml.py3
-rw-r--r--src/lxml/html/tests/transform_feedparser_data.py1
-rw-r--r--src/lxml/includes/etree_defs.h9
-rw-r--r--src/lxml/isoschematron/__init__.py4
-rw-r--r--src/lxml/isoschematron/resources/rng/iso-schematron.rng107
-rw-r--r--src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl83
-rw-r--r--src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt167
-rw-r--r--src/lxml/objectify.pyx7
-rw-r--r--src/lxml/parser.pxi6
-rw-r--r--src/lxml/python.pxd2
-rw-r--r--src/lxml/sax.pxd16
-rw-r--r--src/lxml/sax.py69
-rw-r--r--src/lxml/serializer.pxi56
-rw-r--r--src/lxml/tests/dummy_http_server.py2
-rw-r--r--src/lxml/tests/selftest.py37
-rw-r--r--src/lxml/tests/selftest2.py4
-rw-r--r--src/lxml/tests/test_doctestcompare.py4
-rw-r--r--src/lxml/tests/test_elementtree.py8
-rw-r--r--src/lxml/tests/test_etree.py79
-rw-r--r--src/lxml/tests/test_external_document.py5
-rw-r--r--src/lxml/tests/test_http_io.py2
-rw-r--r--src/lxml/tests/test_incremental_xmlfile.py10
-rw-r--r--src/lxml/tests/test_io.py17
-rw-r--r--src/lxml/tests/test_isoschematron.py6
-rw-r--r--src/lxml/tests/test_objectify.py6
-rw-r--r--src/lxml/tests/test_pyclasslookup.py2
-rw-r--r--src/lxml/tests/test_sax.py139
-rw-r--r--src/lxml/tests/test_threading.py9
-rw-r--r--src/lxml/xmlid.pxi8
-rw-r--r--src/lxml/xpath.pxi2
-rw-r--r--test.py4
-rwxr-xr-xtools/manylinux/build-wheels.sh2
-rw-r--r--tox.ini2
-rw-r--r--version.txt2
73 files changed, 822 insertions, 513 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index a2b7c48f..05fe5607 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -4,8 +4,6 @@ environment:
matrix:
- python: 27
- python: 27-x64
- - python: 33
- - python: 33-x64
- python: 34
- python: 34-x64
- python: 35
diff --git a/.hgignore b/.hgignore
index 103fb6ed..7a702b22 100644
--- a/.hgignore
+++ b/.hgignore
@@ -17,6 +17,7 @@ src/lxml/objectify.c
src/lxml/lxml.objectify.c
build/
+libs/
dist/
wheelhouse/
wheels/
diff --git a/.travis.yml b/.travis.yml
index ff313b07..07e8d247 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,19 +1,66 @@
-language: python
+os: linux
dist: trusty
sudo: false
+language: python
+
+cache:
+ pip: true
+ directories:
+ - $HOME/.ccache
+ - libs
+
python:
- - 2.6
- 2.7
- - 3.3
- - 3.4
- - 3.5
- 3.6
- - 3.7
- - 3.7-dev
- - 3.8-dev
- - pypy
- - pypy3
+ - 3.5
+ - 3.4
+
+env:
+ global:
+ - USE_CCACHE=1
+ - CCACHE_SLOPPINESS=pch_defines,time_macros
+ - CCACHE_COMPRESS=1
+ - CCACHE_MAXSIZE=70M
+ - PATH="/usr/lib/ccache:$PATH"
+ - LIBXML2_VERSION=2.9.8
+ - LIBXSLT_VERSION=1.1.32
+ matrix:
+ - STATIC_DEPS=true
+ - STATIC_DEPS=false
+
+matrix:
+ include:
+ - python: 3.7
+ dist: xenial # Required for Python 3.7
+ sudo: required # travis-ci/travis-ci#9069
+ env: STATIC_DEPS=true
+ - python: 3.7
+ dist: xenial # Required for Python 3.7
+ sudo: required # travis-ci/travis-ci#9069
+ env: STATIC_DEPS=false
+ - python: 3.8-dev
+ dist: xenial # Required for Python 3.7
+ sudo: required # travis-ci/travis-ci#9069
+ env: STATIC_DEPS=true
+ - python: 3.8-dev
+ dist: xenial # Required for Python 3.7
+ sudo: required # travis-ci/travis-ci#9069
+ env: STATIC_DEPS=false
+ - python: 3.6
+ env:
+ - STATIC_DEPS=true
+ - LIBXML2_VERSION=2.9.2 # minimum version requirements
+ - LIBXSLT_VERSION=1.1.27
+ - python: pypy
+ env: STATIC_DEPS=false
+ - python: pypy3
+ env: STATIC_DEPS=false
+ allow_failures:
+ - python: 3.7 # Currently needed to avoid waiting forever for the build.
+ - python: 3.8-dev
+ - python: pypy
+ - python: pypy3
install:
- pip install -U pip wheel
@@ -22,13 +69,5 @@ install:
script:
- CFLAGS="-O0 -g -fPIC" python -u setup.py build_ext --inplace $(if [ -n "${TRAVIS_PYTHON_VERSION##2.*}" -a -n "${TRAVIS_PYTHON_VERSION##3.[34]*}" ]; then echo -n " -j7 "; fi )
+ - ccache -s || true
- CFLAGS="-O0 -g -fPIC" PYTHONUNBUFFERED=x make test
-
-matrix:
- allow_failures:
- - python: 3.8-dev
- - python: pypy
- - python: pypy3
-
-cache:
- pip: true
diff --git a/CHANGES.txt b/CHANGES.txt
index e8e60265..959f4a38 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,35 @@
lxml changelog
==============
+4.3.0 (2019-??-??)
+
+Features added
+--------------
+
+* The module ``lxml.sax`` is compiled using Cython in order to speed it up.
+
+* GH#267: ``lxml.sax.ElementTreeProducer`` now preserves the namespace prefixes.
+ If two prefixes point to the same URI, the first prefix in alphabetical order
+ is used. Patch by Lennart Regebro.
+
+* Updated ISO-Schematron implementation to 2013 version (now MIT licensed)
+ and the corresponding schema to the 2016 version (with optional "properties").
+
+Bugs fixed
+----------
+
+* LP#1799755: Fix a DeprecationWarning in Py3.7+.
+
+Other changes
+-------------
+
+* GH#270, GH#271: Support for Python 2.6 and 3.3 was removed.
+ Patch by hugovk.
+
+* The minimum dependency versions were raised to libxml2 2.9.2 and libxslt 1.1.27,
+ which were released in 2014 and 2012 respectively.
+
+
4.2.6 (2019-01-02)
==================
@@ -3878,16 +3907,16 @@ Features added
prefix to namespace URI mapping. This will create namespace
prefix declarations on these elements and these prefixes will show up
in XML serialization.
-
+
Bugs fixed
----------
-
+
* Killed yet another memory management related bug: trees created
using newDoc would not get a libxml2-level dictionary, which caused
problems when deallocating these documents later if they contained a
node that came from a document with a dictionary.
-* Moving namespaced elements between documents was problematic as
+* Moving namespaced elements between documents was problematic as
references to the original document would remain. This has been fixed
by applying xmlReconciliateNs() after each move operation.
diff --git a/DD.py b/DD.py
index 4c524afa..542a0ff6 100644
--- a/DD.py
+++ b/DD.py
@@ -105,10 +105,10 @@ class OutcomeCache(object):
# Let K0 be the largest element in TAIL such that K0 <= C[START]
k0 = None
for k in self.tail.keys():
- if (k0 == None or k > k0) and k <= c[start]:
+ if (k0 is None or k > k0) and k <= c[start]:
k0 = k
- if k0 != None:
+ if k0 is not None:
return self.tail[k0].lookup_superset(c, start)
return None
@@ -130,20 +130,20 @@ class OutcomeCache(object):
def oc_test():
oc = OutcomeCache()
- assert oc.lookup([1, 2, 3]) == None
+ assert oc.lookup([1, 2, 3]) is None
oc.add([1, 2, 3], 4)
assert oc.lookup([1, 2, 3]) == 4
- assert oc.lookup([1, 2, 3, 4]) == None
+ assert oc.lookup([1, 2, 3, 4]) is None
- assert oc.lookup([5, 6, 7]) == None
+ assert oc.lookup([5, 6, 7]) is None
oc.add([5, 6, 7], 8)
assert oc.lookup([5, 6, 7]) == 8
- assert oc.lookup([]) == None
+ assert oc.lookup([]) is None
oc.add([], 0)
assert oc.lookup([]) == 0
- assert oc.lookup([1, 2]) == None
+ assert oc.lookup([1, 2]) is None
oc.add([1, 2], 3)
assert oc.lookup([1, 2]) == 3
assert oc.lookup([1, 2, 3]) == 4
@@ -154,21 +154,21 @@ def oc_test():
assert oc.lookup_superset([5, 6]) == 8
assert oc.lookup_superset([6, 7]) == 8
assert oc.lookup_superset([7]) == 8
- assert oc.lookup_superset([]) != None
+ assert oc.lookup_superset([]) is not None
- assert oc.lookup_superset([9]) == None
- assert oc.lookup_superset([7, 9]) == None
- assert oc.lookup_superset([-5, 1]) == None
- assert oc.lookup_superset([1, 2, 3, 9]) == None
- assert oc.lookup_superset([4, 5, 6, 7]) == None
+ assert oc.lookup_superset([9]) is None
+ assert oc.lookup_superset([7, 9]) is None
+ assert oc.lookup_superset([-5, 1]) is None
+ assert oc.lookup_superset([1, 2, 3, 9]) is None
+ assert oc.lookup_superset([4, 5, 6, 7]) is None
assert oc.lookup_subset([]) == 0
assert oc.lookup_subset([1, 2, 3]) == 4
assert oc.lookup_subset([1, 2, 3, 4]) == 4
- assert oc.lookup_subset([1, 3]) == None
+ assert oc.lookup_subset([1, 3]) is None
assert oc.lookup_subset([1, 2]) == 3
- assert oc.lookup_subset([-5, 1]) == None
+ assert oc.lookup_subset([-5, 1]) is None
assert oc.lookup_subset([-5, 1, 2]) == 3
assert oc.lookup_subset([-5]) == 0
@@ -291,7 +291,7 @@ class DD(object):
# If we had this test before, return its result
if self.cache_outcomes:
cached_result = self.outcome_cache.lookup(c)
- if cached_result != None:
+ if cached_result is not None:
return cached_result
if self.monotony:
@@ -387,7 +387,7 @@ class DD(object):
self.__resolving = 1
csubr = self.resolve(csubr, c, direction)
- if csubr == None:
+ if csubr is None:
# Nothing left to resolve
break
@@ -406,7 +406,7 @@ class DD(object):
t = self.test(csubr)
self.__resolving = 0
- if csubr == None:
+ if csubr is None:
return self.UNRESOLVED, initial_csub
# assert t == self.PASS or t == self.FAIL
@@ -447,7 +447,7 @@ class DD(object):
def _old_dd(self, c, r, n):
"""Stub to overload in subclasses"""
- if r == []:
+ if not r:
assert self.test([]) == self.PASS
assert self.test(c) == self.FAIL
else:
@@ -498,7 +498,7 @@ class DD(object):
doubled = self.__listintersect(cbar, cs[i])
- if doubled != []:
+ if doubled:
cs[i] = self.__listminus(cs[i], doubled)
@@ -553,7 +553,7 @@ class DD(object):
if self.minimize:
(t, csub) = self.test_and_resolve(csub, [], c, direction)
if t == self.FAIL:
- return (t, csub)
+ return t, csub
if self.maximize:
csubbar = self.__listminus(self.CC, csub)
@@ -575,7 +575,7 @@ class DD(object):
else:
t = self.UNRESOLVED
- return (t, csub)
+ return t, csub
# Delta Debugging (new ISSTA version)
@@ -661,7 +661,7 @@ class DD(object):
t, cbars[i] = self.test_mix(cbars[i], c, self.ADD)
doubled = self.__listintersect(cbars[i], cs[i])
- if doubled != []:
+ if doubled:
cs[i] = self.__listminus(cs[i], doubled)
if t == self.FAIL:
@@ -744,7 +744,7 @@ class DD(object):
if n > len(c):
# No further minimizing
print("dd: done")
- return (c, c1, c2)
+ return c, c1, c2
self.report_progress(c, "dd")
@@ -825,7 +825,7 @@ class DD(object):
if n >= len(c):
# No further minimizing
print("dd: done")
- return (c, c1, c2)
+ return c, c1, c2
next_n = min(len(c), n * 2)
print("dd: increase granularity to %d" % next_n)
@@ -864,7 +864,7 @@ if __name__ == '__main__':
return self.PASS
def _test_b(self, c):
- if c == []:
+ if not c:
return self.PASS
if 1 in c and 2 in c and 3 in c and 4 in c and \
5 in c and 6 in c and 7 in c and 8 in c:
diff --git a/INSTALL.txt b/INSTALL.txt
index 8508fea0..94d6a3ec 100644
--- a/INSTALL.txt
+++ b/INSTALL.txt
@@ -41,24 +41,17 @@ see below.
Requirements
------------
-You need Python 2.6 or later.
+You need Python 2.7 or 3.4+.
Unless you are using a static binary distribution (e.g. from a
Windows binary installer), lxml requires libxml2 and libxslt to
be installed, in particular:
-* `libxml2 <http://xmlsoft.org/>`_ version 2.7.0 or later.
+* `libxml2 <http://xmlsoft.org/>`_ version 2.9.2 or later.
- * We recommend libxml2 2.9.2 or a later version.
+* `libxslt <http://xmlsoft.org/XSLT/>`_ version 1.1.27 or later.
- * If you want to use the feed parser interface, especially when
- parsing from unicode strings, do not use libxml2 2.7.4 through
- 2.7.6.
-
-* `libxslt <http://xmlsoft.org/XSLT/>`_ version 1.1.23 or later.
-
- * We recommend libxslt 1.1.28 or later. Version 1.1.25 will not
- work due to a missing library symbol.
+ * We recommend libxslt 1.1.28 or later.
Newer versions generally contain fewer bugs and are therefore
recommended. XML Schema support is also still worked on in libxml2,
diff --git a/MANIFEST.in b/MANIFEST.in
index 47abd12a..73db322a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -9,7 +9,7 @@ include src/lxml/*.c src/lxml/html/*.c
recursive-include src *.pyx *.pxd *.pxi *.py
recursive-include src/lxml lxml.etree.h lxml.etree_api.h etree.h etree_api.h etree_defs.h lxml_endian.h
recursive-include src/lxml/isoschematron *.rng *.xsl *.txt
-recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd *.xsd *.sch *.html
+recursive-include src/lxml/tests *.rng *.rnc *.xslt *.xml *.dtd *.xsd *.sch *.html
recursive-include src/lxml/html/tests *.data *.txt
recursive-include samples *.xml
recursive-include benchmark *.py
diff --git a/Makefile b/Makefile
index a96133a2..0f3b3aea 100644
--- a/Makefile
+++ b/Makefile
@@ -78,7 +78,7 @@ valgrind_test_inplace: inplace
$(PYTHON) test.py
gdb_test_inplace: inplace
- @echo -e "file $(PYTHON)\nrun test.py" > .gdb.command
+ @echo "file $(PYTHON)\nrun test.py" > .gdb.command
gdb -x .gdb.command -d src -d src/lxml
bench_inplace: inplace
diff --git a/benchmark/benchbase.py b/benchmark/benchbase.py
index 6b04cb16..e34e6103 100644
--- a/benchmark/benchbase.py
+++ b/benchmark/benchbase.py
@@ -223,7 +223,7 @@ class TreeBenchMark(object):
for i in range(20 * TREE_FACTOR):
SubElement(el, tag).tail = text
t = current_time() - t
- return (root, t)
+ return root, t
def _setup_tree2(self, text, attributes):
"tree with 520 * TREE_FACTOR 2nd level and 26 3rd level children"
@@ -239,7 +239,7 @@ class TreeBenchMark(object):
for ch2 in atoz:
SubElement(el, "{cdefg}%s00001" % ch2).tail = text
t = current_time() - t
- return (root, t)
+ return root, t
def _setup_tree3(self, text, attributes):
"tree of depth 8 + TREE_FACTOR with 3 children per node"
@@ -255,7 +255,7 @@ class TreeBenchMark(object):
child.text = text
child.tail = text
t = current_time() - t
- return (root, t)
+ return root, t
def _setup_tree4(self, text, attributes):
"small tree with 26 2nd level and 2 3rd level children"
@@ -269,7 +269,7 @@ class TreeBenchMark(object):
SubElement(el, "{cdefg}a00001", attributes).tail = text
SubElement(el, "{cdefg}z00000", attributes).tail = text
t = current_time() - t
- return (root, t)
+ return root, t
def benchmarks(self):
"""Returns a list of all benchmarks.
@@ -350,7 +350,7 @@ def buildSuites(benchmark_class, etrees, selected):
if match(b[0]) ] ]
for bs in benchmarks ]
- return (benchmark_suites, benchmarks)
+ return benchmark_suites, benchmarks
def build_treeset_name(trees, tn, an, serialized, children):
text = {0:'-', 1:'S', 2:'U'}[tn]
diff --git a/buildlibxml.py b/buildlibxml.py
index e114eee7..2f5e1a19 100644
--- a/buildlibxml.py
+++ b/buildlibxml.py
@@ -137,7 +137,8 @@ def remote_listdir(url):
return _list_dir_urllib(url)
except IOError:
assert url.lower().startswith('ftp://')
- print("Requesting with urllib failed. Falling back to ftplib. Proxy argument will be ignored")
+ print("Requesting with urllib failed. Falling back to ftplib. "
+ "Proxy argument will be ignored for %s" % url)
return _list_dir_ftplib(url)
@@ -435,11 +436,8 @@ def build_libxml2xslt(download_dir, build_dir,
libxslt_configure_cmd = configure_cmd + [
'--without-python',
'--with-libxml-prefix=%s' % prefix,
- ]
- if sys.platform in ('darwin',):
- libxslt_configure_cmd += [
- '--without-crypto',
- ]
+ '--without-crypto',
+ ]
cmmi(libxslt_configure_cmd, libxslt_dir, multicore, **call_setup)
# collect build setup for lxml
@@ -460,4 +458,4 @@ def build_libxml2xslt(download_dir, build_dir,
for filename in listdir
if lib in filename and filename.endswith('.a')]
- return (xml2_config, xslt_config)
+ return xml2_config, xslt_config
diff --git a/doc/FAQ.txt b/doc/FAQ.txt
index 1c110e16..873e282a 100644
--- a/doc/FAQ.txt
+++ b/doc/FAQ.txt
@@ -218,8 +218,8 @@ not take advantage of lxml's enhanced feature set.
a query framework for XML/HTML, similar to jQuery for JavaScript
* `python-docx <http://github.com/mikemaccana/python-docx>`_,
a package for handling Microsoft's Word OpenXML format
-* `Rambler <http://beta.rambler.ru/srch?query=python+lxml&searchtype=web>`_,
- a meta search engine that aggregates different data sources
+* `Rambler <https://www.rambler.ru/>`_,
+ news aggregator on Runet
* `rdfadict <http://pypi.python.org/pypi/rdfadict>`_,
an RDFa parser with a simple dictionary-like interface.
* `xupdate-processor <http://pypi.python.org/pypi/xupdate-processor>`_,
diff --git a/doc/api.txt b/doc/api.txt
index d4f2c48f..5ebaecd3 100644
--- a/doc/api.txt
+++ b/doc/api.txt
@@ -192,8 +192,7 @@ children. Using the tree defined above, we get:
>>> [ child.tag for child in root ]
['a', 'b', 'c', 'd']
-To iterate in the opposite direction, use the builtin ``reversed()`` function
-that exists in Python 2.4 and later.
+To iterate in the opposite direction, use the builtin ``reversed()`` function.
Tree traversal should use the ``element.iter()`` method:
@@ -251,7 +250,7 @@ The most common way to traverse an XML tree is depth-first, which
traverses the tree in document order. This is implemented by the
``.iter()`` method. While there is no dedicated method for
breadth-first traversal, it is almost as simple if you use the
-``collections.deque`` type that is available in Python 2.4 and later.
+``collections.deque`` type.
.. sourcecode:: pycon
diff --git a/doc/build.txt b/doc/build.txt
index b0499e4e..8d375f7f 100644
--- a/doc/build.txt
+++ b/doc/build.txt
@@ -47,8 +47,9 @@ working Cython installation. You can use pip_ to install it::
https://github.com/lxml/lxml/blob/master/requirements.txt
-lxml currently requires at least Cython 0.20, later release versions
-should work as well.
+lxml currently requires at least Cython 0.26.1, later release versions
+should work as well. For Python 3.7 support, at least Cython 0.29 is
+required.
Github, git and hg
diff --git a/doc/intro.txt b/doc/intro.txt
index 1be3f54c..584c2f2a 100644
--- a/doc/intro.txt
+++ b/doc/intro.txt
@@ -25,7 +25,7 @@ fast, thrilling, powerful, and your code might fail in some horrible way that
you really shouldn't have to worry about when writing Python code. lxml
combines the power of libxml2 with the ease of use of Python.
-.. _`a quote by Mark Pilgrim`: http://diveintomark.org/archives/2004/02/18/libxml2
+.. _`a quote by Mark Pilgrim`: https://web.archive.org/web/20110902041836/http://diveintomark.org/archives/2004/02/18/libxml2
Aims
diff --git a/doc/lxml-source-howto.txt b/doc/lxml-source-howto.txt
index ee921fb8..327eae8c 100644
--- a/doc/lxml-source-howto.txt
+++ b/doc/lxml-source-howto.txt
@@ -154,7 +154,7 @@ lxml.etree
==========
The main module, ``lxml.etree``, is in the file `lxml.etree.pyx
-<https://github.com/lxml/lxml/blob/master/src/lxml/lxml.etree.pyx>`_. It
+<https://github.com/lxml/lxml/blob/master/src/lxml/etree.pyx>`_. It
implements the main functions and types of the ElementTree API, as
well as all the factory functions for proxies. It is the best place
to start if you want to find out how a specific feature is
@@ -303,7 +303,7 @@ lxml.objectify
A Cython implemented extension module that uses the public C-API of
lxml.etree. It provides a Python object-like interface to XML trees.
The implementation resides in the file `lxml.objectify.pyx
-<https://github.com/lxml/lxml/blob/master/src/lxml/lxml.objectify.pyx>`_.
+<https://github.com/lxml/lxml/blob/master/src/lxml/objectify.pyx>`_.
lxml.html
diff --git a/doc/main.txt b/doc/main.txt
index 46df4da5..6ac9312f 100644
--- a/doc/main.txt
+++ b/doc/main.txt
@@ -35,7 +35,7 @@ libxml2_ and libxslt_. It is unique in that it combines the speed and
XML feature completeness of these libraries with the simplicity of a
native Python API, mostly compatible but superior to the well-known
ElementTree_ API. The latest release works with all CPython versions
-from 2.6 to 3.6. See the introduction_ for more information about
+from 2.7 to 3.7. See the introduction_ for more information about
background and goals of the lxml project. Some common questions are
answered in the FAQ_.
diff --git a/doc/xpathxslt.txt b/doc/xpathxslt.txt
index 82369c66..6e159ddc 100644
--- a/doc/xpathxslt.txt
+++ b/doc/xpathxslt.txt
@@ -729,7 +729,7 @@ some ideas to try.
The most simple way to reduce the diversity is by using XSLT
parameters that you pass at call time to configure the stylesheets.
-The ``partial()`` function in the ``functools`` module of Python 2.5
+The ``partial()`` function in the ``functools`` module
may come in handy here. It allows you to bind a set of keyword
arguments (i.e. stylesheet parameters) to a reference of a callable
stylesheet. The same works for instances of the ``XPath()``
diff --git a/requirements.txt b/requirements.txt
index 16fa1b51..45327d28 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-Cython>=0.26.1
+Cython>=0.29.1
diff --git a/setup.py b/setup.py
index ce87b912..4f6f8fe2 100644
--- a/setup.py
+++ b/setup.py
@@ -7,8 +7,8 @@ import os.path
# for command line options and supported environment variables, please
# see the end of 'setupinfo.py'
-if sys.version_info < (2, 6) or sys.version_info[:2] in [(3, 0), (3, 1)]:
- print("This lxml version requires Python 2.6, 2.7, 3.2 or later.")
+if sys.version_info < (2, 7) or sys.version_info[:2] in [(3, 0), (3, 1), (3, 2), (3, 3)]:
+ print("This lxml version requires Python 2.7, 3.4 or later.")
sys.exit(1)
try:
@@ -223,10 +223,8 @@ an appropriate version of Cython installed.
'License :: OSI Approved :: BSD License',
'Programming Language :: Cython',
'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
diff --git a/setupinfo.py b/setupinfo.py
index bdac1d09..5a833d45 100644
--- a/setupinfo.py
+++ b/setupinfo.py
@@ -14,7 +14,13 @@ except ImportError:
CYTHON_INSTALLED = False
EXT_MODULES = ["lxml.etree", "lxml.objectify"]
-COMPILED_MODULES = ["lxml.builder", "lxml._elementpath", "lxml.html.diff", "lxml.html.clean"]
+COMPILED_MODULES = [
+ "lxml.builder",
+ "lxml._elementpath",
+ "lxml.html.diff",
+ "lxml.html.clean",
+ "lxml.sax",
+]
HEADER_FILES = ['etree.h', 'etree_api.h']
if hasattr(sys, 'pypy_version_info') or (
diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py
index 50bc162c..5462df6c 100644
--- a/src/lxml/_elementpath.py
+++ b/src/lxml/_elementpath.py
@@ -1,3 +1,5 @@
+# cython: language_level=2
+
#
# ElementTree
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index eb122a21..5366fcaf 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -247,7 +247,7 @@ cdef _iter_nsmap(nsmap):
if len(nsmap) <= 1:
return nsmap.items()
# nsmap will usually be a plain unordered dict => avoid type checking overhead
- if OrderedDict is not None and type(nsmap) is not dict and isinstance(nsmap, OrderedDict):
+ if type(nsmap) is not dict and isinstance(nsmap, OrderedDict):
return nsmap.items() # keep existing order
if None not in nsmap:
return sorted(nsmap.items())
@@ -273,8 +273,7 @@ cdef _iter_attrib(attrib):
# attrib will usually be a plain unordered dict
if type(attrib) is dict:
return sorted(attrib.items())
- elif isinstance(attrib, _Attrib) or (
- OrderedDict is not None and isinstance(attrib, OrderedDict)):
+ elif isinstance(attrib, (_Attrib, OrderedDict)):
return attrib.items()
else:
# assume it's an unordered mapping of some kind
@@ -1103,8 +1102,8 @@ cdef int _copyNonElementSiblings(xmlNode* c_node, xmlNode* c_target) except -1:
tree.xmlAddPrevSibling(c_target, c_copy)
c_sibling = c_sibling.next
while c_sibling.next != NULL and \
- (c_sibling.next.type == tree.XML_PI_NODE or \
- c_sibling.next.type == tree.XML_COMMENT_NODE):
+ (c_sibling.next.type == tree.XML_PI_NODE or
+ c_sibling.next.type == tree.XML_COMMENT_NODE):
c_sibling = c_sibling.next
c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
if c_copy is NULL:
@@ -1341,14 +1340,50 @@ cdef int _addSibling(_Element element, _Element sibling, bint as_next) except -1
moveNodeToDocument(element._doc, c_source_doc, c_node)
return 0
-cdef inline int isutf8(const_xmlChar* s):
+cdef inline bint isutf8(const_xmlChar* s):
cdef xmlChar c = s[0]
while c != c'\0':
if c & 0x80:
- return 1
+ return True
s += 1
c = s[0]
- return 0
+ return False
+
+cdef bint isutf8l(const_xmlChar* s, size_t length):
+ """
+ Search for non-ASCII characters in the string, knowing its length in advance.
+ """
+ cdef int i
+ cdef unsigned long non_ascii_mask
+ cdef const unsigned long *lptr = <const unsigned long*> s
+
+ cdef const unsigned long *end = lptr + length // sizeof(unsigned long)
+ if length >= sizeof(non_ascii_mask):
+ # Build constant 0x80808080... mask (and let the C compiler fold it).
+ non_ascii_mask = 0
+ for i in range(sizeof(non_ascii_mask) // 2):
+ non_ascii_mask = (non_ascii_mask << 16) | 0x8080
+
+ # Advance to long-aligned character before we start reading longs.
+ while (<size_t>s) % sizeof(unsigned long) and s < <const_xmlChar *>end:
+ if s[0] & 0x80:
+ return True
+ s += 1
+
+ # Read one long at a time
+ lptr = <const unsigned long*> s
+ while lptr < end:
+ if lptr[0] & non_ascii_mask:
+ return True
+ lptr += 1
+ s = <const_xmlChar *>lptr
+
+ while s < (<const_xmlChar *>end + length % sizeof(unsigned long)):
+ if s[0] & 0x80:
+ return True
+ s += 1
+
+ return False
cdef int _is_valid_xml_ascii(bytes pystring):
"""Check if a string is XML ascii content."""
@@ -1412,7 +1447,7 @@ cdef object funicode(const_xmlChar* s):
spos += 1
slen = spos - s
if spos[0] != c'\0':
- slen += tree.xmlStrlen(spos)
+ slen += cstring_h.strlen(<const char*> spos)
if is_non_ascii:
return s[:slen].decode('UTF-8')
return <bytes>s[:slen]
@@ -1521,7 +1556,7 @@ cdef object _encodeFilenameUTF8(object filename):
if filename is None:
return None
elif isinstance(filename, bytes):
- if not isutf8(<bytes>filename):
+ if not isutf8l(<bytes>filename, len(<bytes>filename)):
# plain ASCII!
return filename
c_filename = _cstr(<bytes>filename)
@@ -1658,7 +1693,7 @@ cdef object _namespacedNameFromNsName(const_xmlChar* href, const_xmlChar* name):
return python.PyUnicode_FromFormat("{%s}%s", href, name)
else:
s = python.PyBytes_FromFormat("{%s}%s", href, name)
- if python.IS_PYPY and (python.LXML_UNICODE_STRINGS or isutf8(_xcstr(s))):
+ if python.IS_PYPY and (python.LXML_UNICODE_STRINGS or isutf8l(s, len(s))):
return (<bytes>s).decode('utf8')
else:
return s
diff --git a/src/lxml/builder.pxd b/src/lxml/builder.pxd
index cc8a9b34..f6b2fb5f 100644
--- a/src/lxml/builder.pxd
+++ b/src/lxml/builder.pxd
@@ -1,3 +1,4 @@
+# cython: language_level=2
cdef object ET
cdef object partial
diff --git a/src/lxml/builder.py b/src/lxml/builder.py
index 832cec31..a2888456 100644
--- a/src/lxml/builder.py
+++ b/src/lxml/builder.py
@@ -1,3 +1,5 @@
+# cython: language_level=2
+
#
# Element generator factory by Fredrik Lundh.
#
diff --git a/src/lxml/classlookup.pxi b/src/lxml/classlookup.pxi
index f4f15f3f..89302251 100644
--- a/src/lxml/classlookup.pxi
+++ b/src/lxml/classlookup.pxi
@@ -196,7 +196,7 @@ cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
elif c_node.type == tree.XML_PI_NODE:
expected = PIBase
else:
- assert 0, f"Unknown node type: {c_node.type}"
+ assert False, f"Unknown node type: {c_node.type}"
if not (isinstance(cls, type) and issubclass(cls, expected)):
raise TypeError(
@@ -333,7 +333,7 @@ cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
else:
return (<ElementDefaultClassLookup>state).pi_class
else:
- assert 0, f"Unknown node type: {c_node.type}"
+ assert False, f"Unknown node type: {c_node.type}"
################################################################################
diff --git a/src/lxml/doctestcompare.py b/src/lxml/doctestcompare.py
index eb7c7f99..1b0daa49 100644
--- a/src/lxml/doctestcompare.py
+++ b/src/lxml/doctestcompare.py
@@ -209,13 +209,12 @@ class LXMLOutputChecker(OutputChecker):
else:
return value
html = parser is html_fromstring
- diff_parts = []
- diff_parts.append('Expected:')
- diff_parts.append(self.format_doc(want_doc, html, 2))
- diff_parts.append('Got:')
- diff_parts.append(self.format_doc(got_doc, html, 2))
- diff_parts.append('Diff:')
- diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
+ diff_parts = ['Expected:',
+ self.format_doc(want_doc, html, 2),
+ 'Got:',
+ self.format_doc(got_doc, html, 2),
+ 'Diff:',
+ self.collect_diff(want_doc, got_doc, html, 2)]
return '\n'.join(diff_parts)
def html_empty_tag(self, el, html=True):
diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx
index acea9d20..3ba50798 100644
--- a/src/lxml/etree.pyx
+++ b/src/lxml/etree.pyx
@@ -1,5 +1,6 @@
# cython: binding=True
# cython: auto_pickle=False
+# cython: language_level=2
"""
The ``lxml.etree`` module implements the extended ElementTree API for XML.
@@ -65,11 +66,8 @@ from os.path import abspath as os_path_abspath
cdef object BytesIO, StringIO
from io import BytesIO, StringIO
-cdef object OrderedDict = None
-try:
- from collections import OrderedDict
-except ImportError:
- pass
+cdef object OrderedDict
+from collections import OrderedDict
cdef object _elementpath
from lxml import _elementpath
@@ -91,7 +89,7 @@ cdef object ITER_EMPTY = iter(())
try:
from collections.abc import MutableMapping # Py3.3+
except ImportError:
- from collections import MutableMapping # Py2.6+
+ from collections import MutableMapping # Py2.7
class _ImmutableMapping(MutableMapping):
def __getitem__(self, key):
@@ -388,7 +386,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
root_name = None
else:
root_name = funicode(c_root_node.name)
- return (root_name, public_id, sys_url)
+ return root_name, public_id, sys_url
@cython.final
cdef getxmlinfo(self):
@@ -402,7 +400,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
encoding = None
else:
encoding = funicode(c_doc.encoding)
- return (version, encoding)
+ return version, encoding
@cython.final
cdef isstandalone(self):
@@ -3277,9 +3275,9 @@ def tostring(element_or_tree, *, encoding=None, method="xml",
declaration by default.
You can also serialise to a Unicode string without declaration by
- passing the ``unicode`` function as encoding (or ``str`` in Py3),
- or the name 'unicode'. This changes the return value from a byte
- string to an unencoded unicode string.
+ passing the name ``'unicode'`` as encoding (or the ``str`` function
+ in Py3 or ``unicode`` in Py2). This changes the return value from
+ a byte string to an unencoded unicode string.
The keyword argument 'pretty_print' (bool) enables formatted XML.
@@ -3437,7 +3435,6 @@ def adopt_external_document(capsule, _BaseParser parser=None):
This allows external libraries to build XML/HTML trees using libxml2
and then pass them efficiently into lxml for further processing.
- Requires Python 2.7 or later.
If a ``parser`` is provided, it will be used for configuring the
lxml document. No parsing will be done.
@@ -3461,9 +3458,6 @@ def adopt_external_document(capsule, _BaseParser parser=None):
If no copy is made, later modifications of the tree outside of lxml
should not be attempted after transferring the ownership.
"""
- if python.PY_VERSION_HEX < 0x02070000:
- raise NotImplementedError("PyCapsule usage requires Python 2.7+")
-
cdef xmlDoc* c_doc
cdef bint is_owned = False
c_doc = <xmlDoc*> python.lxml_unpack_xmldoc_capsule(capsule, &is_owned)
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index 84359b67..aa9fc57f 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -1,3 +1,5 @@
+# cython: language_level=2
+
"""A cleanup tool for HTML.
Removes unwanted tags and content. See the `Cleaner` class for
@@ -30,11 +32,6 @@ except NameError:
# Python 3
unicode = str
try:
- bytes
-except NameError:
- # Python < 2.6
- bytes = str
-try:
basestring
except NameError:
basestring = (str, bytes)
@@ -215,7 +212,7 @@ class Cleaner(object):
safe_attrs = defs.safe_attrs
add_nofollow = False
host_whitelist = ()
- whitelist_tags = set(['iframe', 'embed'])
+ whitelist_tags = {'iframe', 'embed'}
def __init__(self, **kw):
for name, value in kw.items():
diff --git a/src/lxml/html/diff.py b/src/lxml/html/diff.py
index 57bc3148..5d143bd2 100644
--- a/src/lxml/html/diff.py
+++ b/src/lxml/html/diff.py
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
from __future__ import absolute_import
import difflib
@@ -623,7 +625,7 @@ def fixup_chunks(chunks):
% (cur_word, result, chunk, chunks))
cur_word.post_tags.append(chunk)
else:
- assert(0)
+ assert False
if not result:
return [token('', pre_tags=tag_accum)]
@@ -801,7 +803,6 @@ def _move_el_inside_block(el, tag):
if _contains_block_level_tag(child):
break
else:
- import sys
# No block-level tags in any child
children_tag = etree.Element(tag)
children_tag.text = el.text
diff --git a/src/lxml/html/tests/test_autolink.py b/src/lxml/html/tests/test_autolink.py
index 61b474ce..7a782be9 100644
--- a/src/lxml/html/tests/test_autolink.py
+++ b/src/lxml/html/tests/test_autolink.py
@@ -1,10 +1,9 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_autolink.txt')])
+ suite.addTests([make_doctest('test_autolink.txt')])
return suite
if __name__ == '__main__':
diff --git a/src/lxml/html/tests/test_basic.py b/src/lxml/html/tests/test_basic.py
index fd4896a7..6e35c274 100644
--- a/src/lxml/html/tests/test_basic.py
+++ b/src/lxml/html/tests/test_basic.py
@@ -1,11 +1,10 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest, doctest
import lxml.html
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_basic.txt')])
+ suite.addTests([make_doctest('test_basic.txt')])
suite.addTests([doctest.DocTestSuite(lxml.html)])
return suite
diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py
index 3bcaaf5a..a193d994 100644
--- a/src/lxml/html/tests/test_clean.py
+++ b/src/lxml/html/tests/test_clean.py
@@ -1,6 +1,5 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest
-from lxml.etree import LIBXML_VERSION
import lxml.html
from lxml.html.clean import Cleaner, clean_html
@@ -72,9 +71,7 @@ class CleanerTest(unittest.TestCase):
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_clean.txt')])
- if LIBXML_VERSION >= (2,6,31):
- suite.addTests([make_doctest('test_clean_embed.txt')])
+ suite.addTests([make_doctest('test_clean.txt')])
+ suite.addTests([make_doctest('test_clean_embed.txt')])
suite.addTests(unittest.makeSuite(CleanerTest))
return suite
diff --git a/src/lxml/html/tests/test_diff.py b/src/lxml/html/tests/test_diff.py
index f1fba4bc..c1adbd67 100644
--- a/src/lxml/html/tests/test_diff.py
+++ b/src/lxml/html/tests/test_diff.py
@@ -1,13 +1,12 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest, doctest
from lxml.html import diff
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_diff.txt'),
- doctest.DocTestSuite(diff)])
+ suite.addTests([make_doctest('test_diff.txt'),
+ doctest.DocTestSuite(diff)])
return suite
if __name__ == '__main__':
diff --git a/src/lxml/html/tests/test_feedparser_data.py b/src/lxml/html/tests/test_feedparser_data.py
index eaf8c29e..29a500ff 100644
--- a/src/lxml/html/tests/test_feedparser_data.py
+++ b/src/lxml/html/tests/test_feedparser_data.py
@@ -1,4 +1,3 @@
-import sys
import os
import re
try:
@@ -8,8 +7,7 @@ except ImportError:
from email import message_from_file as Message
import unittest
from lxml.tests.common_imports import doctest
-if sys.version_info >= (2,4):
- from lxml.doctestcompare import LHTMLOutputChecker
+from lxml.doctestcompare import LHTMLOutputChecker
from lxml.html.clean import clean, Cleaner
@@ -83,16 +81,15 @@ class FeedTestCase(unittest.TestCase):
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- for dir in feed_dirs:
- for fn in os.listdir(dir):
- fn = os.path.join(dir, fn)
- if fn.endswith('.data'):
- case = FeedTestCase(fn)
- suite.addTests([case])
- # This is my lazy way of stopping on first error:
- try:
- case.runTest()
- except:
- break
+ for dir in feed_dirs:
+ for fn in os.listdir(dir):
+ fn = os.path.join(dir, fn)
+ if fn.endswith('.data'):
+ case = FeedTestCase(fn)
+ suite.addTests([case])
+ # This is my lazy way of stopping on first error:
+ try:
+ case.runTest()
+ except:
+ break
return suite
diff --git a/src/lxml/html/tests/test_formfill.py b/src/lxml/html/tests/test_formfill.py
index 7893c20b..0f535186 100644
--- a/src/lxml/html/tests/test_formfill.py
+++ b/src/lxml/html/tests/test_formfill.py
@@ -1,8 +1,7 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_formfill.txt')])
+ suite.addTests([make_doctest('test_formfill.txt')])
return suite
diff --git a/src/lxml/html/tests/test_forms.py b/src/lxml/html/tests/test_forms.py
index e8b00c4d..37a0327f 100644
--- a/src/lxml/html/tests/test_forms.py
+++ b/src/lxml/html/tests/test_forms.py
@@ -1,10 +1,9 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_forms.txt')])
+ suite.addTests([make_doctest('test_forms.txt')])
return suite
if __name__ == '__main__':
diff --git a/src/lxml/html/tests/test_html5parser.py b/src/lxml/html/tests/test_html5parser.py
index 241517ea..56afe98b 100644
--- a/src/lxml/html/tests/test_html5parser.py
+++ b/src/lxml/html/tests/test_html5parser.py
@@ -7,23 +7,7 @@ except ImportError: # python 3
import sys
import tempfile
import unittest
-try:
- from unittest import skipUnless
-except ImportError:
- # sys.version < (2, 7)
- def skipUnless(condition, reason):
- return lambda f: condition and f or None
-
-if sys.version_info < (2,6):
- class NamedTemporaryFile(object):
- def __init__(self, delete=True, **kwargs):
- self._tmpfile = tempfile.NamedTemporaryFile(**kwargs)
- def close(self):
- self._tmpfile.flush()
- def __getattr__(self, name):
- return getattr(self._tmpfile, name)
-else:
- NamedTemporaryFile = tempfile.NamedTemporaryFile
+from unittest import skipUnless
from lxml.builder import ElementMaker
from lxml.etree import Element, ElementTree, ParserError
@@ -318,7 +302,7 @@ class Test_parse(unittest.TestCase):
return parse(*args, **kwargs)
def make_temp_file(self, contents=''):
- tmpfile = NamedTemporaryFile(delete=False)
+ tmpfile = tempfile.NamedTemporaryFile(delete=False)
try:
tmpfile.write(contents.encode('utf8'))
tmpfile.flush()
diff --git a/src/lxml/html/tests/test_rewritelinks.py b/src/lxml/html/tests/test_rewritelinks.py
index b4653234..100105fa 100644
--- a/src/lxml/html/tests/test_rewritelinks.py
+++ b/src/lxml/html/tests/test_rewritelinks.py
@@ -1,10 +1,9 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([make_doctest('test_rewritelinks.txt')])
+ suite.addTests([make_doctest('test_rewritelinks.txt')])
return suite
if __name__ == '__main__':
diff --git a/src/lxml/html/tests/test_select.py b/src/lxml/html/tests/test_select.py
index 40888ef7..499ff7d5 100644
--- a/src/lxml/html/tests/test_select.py
+++ b/src/lxml/html/tests/test_select.py
@@ -39,7 +39,7 @@ class SelectTest(unittest.TestCase):
def test_multiple_select_value_multiple_selected_options(self):
self.assertEqual(
self._evaluate_select([('a', True), ('b', True)], multiple=True),
- set(['a', 'b']))
+ {'a', 'b'})
def test_suite():
diff --git a/src/lxml/html/tests/test_xhtml.py b/src/lxml/html/tests/test_xhtml.py
index dc34aa70..cc66170d 100644
--- a/src/lxml/html/tests/test_xhtml.py
+++ b/src/lxml/html/tests/test_xhtml.py
@@ -1,6 +1,5 @@
-import unittest, sys
+import unittest
from lxml.tests.common_imports import make_doctest
-import lxml.html
def test_suite():
suite = unittest.TestSuite()
diff --git a/src/lxml/html/tests/transform_feedparser_data.py b/src/lxml/html/tests/transform_feedparser_data.py
index d340912b..38ced243 100644
--- a/src/lxml/html/tests/transform_feedparser_data.py
+++ b/src/lxml/html/tests/transform_feedparser_data.py
@@ -105,6 +105,5 @@ def translate_all(dir):
translate_file(fn)
if __name__ == '__main__':
- import sys
translate_all(os.path.join(os.path.dirname(__file__), 'feedparser-data'))
diff --git a/src/lxml/includes/etree_defs.h b/src/lxml/includes/etree_defs.h
index f935a79e..ccf35a59 100644
--- a/src/lxml/includes/etree_defs.h
+++ b/src/lxml/includes/etree_defs.h
@@ -6,8 +6,8 @@
#ifndef PY_VERSION_HEX
# error the development package of Python (header files etc.) is not installed correctly
#else
-# if PY_VERSION_HEX < 0x02060000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03020000
-# error this version of lxml requires Python 2.6, 2.7, 3.2 or later
+# if PY_VERSION_HEX < 0x02070000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03030000
+# error this version of lxml requires Python 2.7, 3.3 or later
# endif
#endif
@@ -262,8 +262,6 @@ long _ftol2( double dblSource ) { return _ftol( dblSource ); }
(((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
-/* PyCapsule was added in Py2.7 */
-#if PY_VERSION_HEX >= 0x02070000
#include "string.h"
static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) {
xmlDoc *c_doc;
@@ -301,9 +299,6 @@ static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) {
}
return c_doc;
}
-#else
-# define lxml_unpack_xmldoc_capsule(capsule, is_owned) ((((void)capsule, 0) || ((void)is_owned, 0)) ? NULL : NULL)
-#endif
/* Macro pair implementation of a depth first tree walker
*
diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py
index e66f6a10..5967b109 100644
--- a/src/lxml/isoschematron/__init__.py
+++ b/src/lxml/isoschematron/__init__.py
@@ -63,8 +63,8 @@ svrl_validation_errors = _etree.XPath(
# RelaxNG validator for schematron schemas
-schematron_schema_valid = _etree.RelaxNG(_etree.parse(
- os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')))
+schematron_schema_valid = _etree.RelaxNG(
+ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
def stylesheet_params(**kwargs):
diff --git a/src/lxml/isoschematron/resources/rng/iso-schematron.rng b/src/lxml/isoschematron/resources/rng/iso-schematron.rng
index d822f0d6..a4f504af 100644
--- a/src/lxml/isoschematron/resources/rng/iso-schematron.rng
+++ b/src/lxml/isoschematron/resources/rng/iso-schematron.rng
@@ -1,9 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright © ISO/IEC 2015 -->
<!--
- (c) International Organization for Standardization 2005.
- Permission to copy in any form is granted for use with conforming
- SGML systems and applications as defined in ISO 8879,
- provided this notice is included in all copies.
+ The following permission notice and disclaimer shall be included in all
+ copies of this XML schema ("the Schema"), and derivations of the Schema:
+
+ Permission is hereby granted, free of charge in perpetuity, to any
+ person obtaining a copy of the Schema, to use, copy, modify, merge and
+ distribute free of charge, copies of the Schema for the purposes of
+ developing, implementing, installing and using software based on the
+ Schema, and to permit persons to whom the Schema is furnished to do so,
+ subject to the following conditions:
+
+ THE SCHEMA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SCHEMA OR THE USE OR
+ OTHER DEALINGS IN THE SCHEMA.
+
+ In addition, any modified copy of the Schema shall include the following
+ notice:
+
+ "THIS SCHEMA HAS BEEN MODIFIED FROM THE SCHEMA DEFINED IN ISO/IEC 19757-3,
+ AND SHOULD NOT BE INTERPRETED AS COMPLYING WITH THAT STANDARD".
-->
<grammar ns="http://purl.oclc.org/dsdl/schematron" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
<start>
@@ -63,6 +83,10 @@
<optional>
<ref name="diagnostics"/>
</optional>
+ <optional>
+ <!-- edited (lxml): required in standard, optional here (since it can be empty anyway) -->
+ <ref name="properties"/>
+ </optional>
</group>
</interleave>
</element>
@@ -105,6 +129,11 @@
<data type="IDREFS"/>
</attribute>
</optional>
+ <optional>
+ <attribute name="properties">
+ <data type="IDREFS"/>
+ </attribute>
+ </optional>
<ref name="rich"/>
<ref name="linkable"/>
<interleave>
@@ -178,9 +207,14 @@
</define>
<define name="extends">
<element name="extends">
- <attribute name="rule">
- <data type="IDREF"/>
- </attribute>
+ <choice>
+ <attribute name="rule">
+ <data type="IDREF"/>
+ </attribute>
+ <attribute name="href">
+ <ref name="uriValue"/>
+ </attribute>
+ </choice>
<ref name="foreign-empty"/>
</element>
</define>
@@ -189,9 +223,14 @@
<attribute name="name">
<ref name="nameValue"/>
</attribute>
- <attribute name="value">
- <data type="string" datatypeLibrary=""/>
- </attribute>
+ <choice>
+ <attribute name="value">
+ <data type="string" datatypeLibrary=""/>
+ </attribute>
+ <oneOrMore>
+ <ref name="foreign-element"/>
+ </oneOrMore>
+ </choice>
</element>
</define>
<define name="name">
@@ -257,6 +296,11 @@
</define>
<define name="pattern">
<element name="pattern">
+ <optional>
+ <attribute name="documents">
+ <ref name="pathValue"/>
+ </attribute>
+ </optional>
<ref name="rich"/>
<interleave>
<ref name="foreign"/>
@@ -367,6 +411,41 @@
</interleave>
</element>
</define>
+ <define name="properties">
+ <element name="properties">
+ <zeroOrMore>
+ <ref name="property"/>
+ </zeroOrMore>
+ </element>
+ </define>
+ <define name="property">
+ <element name="property">
+ <attribute name="id">
+ <data type="ID"/>
+ </attribute>
+ <optional>
+ <attribute name="role">
+ <ref name="roleValue"/>
+ </attribute>
+ </optional>
+ <optional>
+ <attribute name="scheme"/>
+ </optional>
+ <interleave>
+ <ref name="foreign"/>
+ <zeroOrMore>
+ <choice>
+ <text/>
+ <ref name="name"/>
+ <ref name="value-of"/>
+ <ref name="emph"/>
+ <ref name="dir"/>
+ <ref name="span"/>
+ </choice>
+ </zeroOrMore>
+ </interleave>
+ </element>
+ </define>
<define name="report">
<element name="report">
<attribute name="test">
@@ -387,6 +466,11 @@
<data type="IDREFS"/>
</attribute>
</optional>
+ <optional>
+ <attribute name="properties">
+ <data type="IDREFS"/>
+ </attribute>
+ </optional>
<ref name="rich"/>
<ref name="linkable"/>
<interleave>
@@ -434,6 +518,7 @@
<ref name="assert"/>
<ref name="report"/>
<ref name="extends"/>
+ <ref name="p"/>
</choice>
</oneOrMore>
</group>
@@ -459,6 +544,7 @@
<ref name="assert"/>
<ref name="report"/>
<ref name="extends"/>
+ <ref name="p"/>
</choice>
</oneOrMore>
</group>
@@ -501,6 +587,7 @@
<attribute name="href">
<ref name="uriValue"/>
</attribute>
+ <ref name="foreign-empty"/>
</element>
</define>
<define name="rich">
diff --git a/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl b/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl
index 057c7c1f..50183952 100644
--- a/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl
+++ b/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl
@@ -6,10 +6,11 @@
This is a preprocessor for ISO Schematron, which implements abstract patterns.
It also
* extracts a particular schema using an ID, where there are multiple
- schemas, such as when they are embedded in the same NVDL script
- * experimentally, allows parameter recognition and substitution inside
- text as well as @context, @test, & @select.
-
+ schemas, such as when they are embedded in the same NVDL script
+ * allows parameter substitution inside @context, @test, @select, @path
+ * experimentally, allows parameter recognition and substitution inside
+ text (NOTE: to be removed, for compataibility with other implementations,
+ please do not use this)
This should be used after iso-dsdl-include.xsl and before the skeleton or
meta-stylesheet (e.g. iso-svrl.xsl) . It only requires XSLT 1.
@@ -17,8 +18,45 @@
Each kind of inclusion can be turned off (or on) on the command line.
-->
-<!--
- VERSION INFORMATION
+
+<!--
+Open Source Initiative OSI - The MIT License:Licensing
+[OSI Approved License]
+
+This source code was previously available under the zlib/libpng license.
+Attribution is polite.
+
+The MIT License
+
+Copyright (c) 2004-2010 Rick Jellife and Academia Sinica Computing Centre, Taiwan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+-->
+
+<!--
+VERSION INFORMATION
+ 2013-09-19 RJ
+ * Allow macro expansion in @path attributes, eg. for sch:name/@path
+
+ 2010-07-10 RJ
+ * Move to MIT license
+
2008-09-18 RJ
* move out param test from iso:schema template to work with XSLT 1. (Noah Fontes)
@@ -40,35 +78,11 @@
* Original written for old namespace
* http://www.topologi.com/resources/iso-pre-pro.xsl
-->
-<!--
- LEGAL INFORMATION
-
- Copyright (c) 2000-2008 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
-
- This software is provided 'as-is', without any express or implied warranty.
- In no event will the authors be held liable for any damages arising from
- the use of this software.
-
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it freely,
- subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you must not claim
- that you wrote the original software. If you use this software in a product,
- an acknowledgment in the product documentation would be appreciated but is
- not required.
-
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
-
- 3. This notice may not be removed or altered from any source distribution.
--->
<xslt:stylesheet version="1.0" xmlns:xslt="http://www.w3.org/1999/XSL/Transform"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:iso="http://purl.oclc.org/dsdl/schematron"
xmlns:nvdl="http://purl.oclc.org/dsdl/nvdl"
-
xmlns:iae="http://www.schematron.com/namespace/iae"
>
@@ -231,7 +245,7 @@
<xslt:template mode="iae:do-pattern" match="*">
<xslt:param name="caller"/>
<xslt:copy>
- <xslt:for-each select="@*[name()='test' or name()='context' or name()='select']">
+ <xslt:for-each select="@*[name()='test' or name()='context' or name()='select' or name()='path' ]">
<xslt:attribute name="{name()}">
<xslt:call-template name="iae:macro-expand">
<xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param>
@@ -239,12 +253,13 @@
</xslt:call-template>
</xslt:attribute>
</xslt:for-each>
- <xslt:copy-of select="@*[name()!='test'][name()!='context'][name()!='select']" />
+ <xslt:copy-of select="@*[name()!='test'][name()!='context'][name()!='select'][name()!='path']" />
<xsl:for-each select="node()">
<xsl:choose>
<!-- Experiment: replace macros in text as well, to allow parameterized assertions
and so on, without having to have spurious <iso:value-of> calls and multiple
- delimiting -->
+ delimiting.
+ NOTE: THIS FUNCTIONALITY WILL BE REMOVED IN THE FUTURE -->
<xsl:when test="self::text()">
<xslt:call-template name="iae:macro-expand">
<xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param>
@@ -293,4 +308,6 @@
</xsl:choose>
</xslt:template>
+
+
</xslt:stylesheet> \ No newline at end of file
diff --git a/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt b/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
index d9f68c5a..e5d6dfcd 100644
--- a/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
+++ b/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
@@ -1,83 +1,84 @@
-ISO SCHEMATRON 2009
-
-XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
-
-2009-03-18
-
-Two distributions are available. One is for XSLT1 engines.
-The other is for XSLT2 engines, such as SAXON 9.
-
-
-This version of Schematron splits the process into a pipeline of several different XSLT stages.
-
-1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
-This is a macro processor to assemble the schema from various parts.
-If your schema is not in separate parts, you can skip this stage.
-
-2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
-This is a macro processor to convert abstract patterns to real patterns.
-If your schema does not use abstract patterns, you can skip this
-stage.
-
-3) Third, compile the Schematron schema into an XSLT script.
-This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
-(which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
-However, other "meta-styleseets" are also in common use; the principle of operation is the same.
-If your schema uses Schematron phases, supply these as command line/invocation parameters
-to this process.
-
-4) Fourth, run the script generated by stage 3 against the document being validated.
-If you are using the SVRL script, then the output of validation will be an XML document.
-If your schema uses Schematron parameters, supply these as command line/invocation parameters
-to this process.
-
-
-The XSLT2 distribution also features several next generation features,
-such as validating multiple documents. See the source code for details.
-
-Schematron assertions can be written in any language, of course; the file
-sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
-in English, and this can be used as template to localize the skeleton's
-error messages. Note that typically programming errors in Schematron are XPath
-errors, which requires localized messages from the XSLT engine.
-
-ANT
----
-To give an example of how to process a document, here is a sample ANT task.
-
-<target name="schematron-compile-test" >
-
- <!-- expand inclusions -->
- <xslt basedir="test/schematron"
- style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
- <classpath>
- <pathelement location="${lib.dir}/saxon9.jar"/>
- </classpath>
- </xslt>
-
- <!-- expand abstract patterns -->
- <xslt basedir="test/schematron"
- style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
- <classpath>
- <pathelement location="${lib.dir}/saxon9.jar"/>
- </classpath>
- </xslt>
-
-
-
- <!-- compile it -->
- <xslt basedir="test/schematron"
- style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
- <classpath>
- <pathelement location="${lib.dir}/saxon9.jar"/>
- </classpath>
- </xslt>
-
- <!-- validate -->
- <xslt basedir="test/schematron"
- style="test.xsl" in="instance.xml" out="instance.svrlt">
- <classpath>
- <pathelement location="${lib.dir}/saxon9.jar"/>
- </classpath>
- </xslt>
- </target> \ No newline at end of file
+ISO SCHEMATRON 2010
+
+XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
+
+2010-04-21
+
+Two distributions are available. One is for XSLT1 engines.
+The other is for XSLT2 engines, such as SAXON 9.
+
+
+This version of Schematron splits the process into a pipeline of several different XSLT stages.
+
+1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
+This is a macro processor to assemble the schema from various parts.
+If your schema is not in separate parts, you can skip this stage.
+This stage also generates error messages for some common XPath syntax problems.
+
+2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
+This is a macro processor to convert abstract patterns to real patterns.
+If your schema does not use abstract patterns, you can skip this
+stage.
+
+3) Third, compile the Schematron schema into an XSLT script.
+This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
+(which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
+However, other "meta-stylesheets" are also in common use; the principle of operation is the same.
+If your schema uses Schematron phases, supply these as command line/invocation parameters
+to this process.
+
+4) Fourth, run the script generated by stage 3 against the document being validated.
+If you are using the SVRL script, then the output of validation will be an XML document.
+If your schema uses Schematron parameters, supply these as command line/invocation parameters
+to this process.
+
+
+The XSLT2 distribution also features several next generation features,
+such as validating multiple documents. See the source code for details.
+
+Schematron assertions can be written in any language, of course; the file
+sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
+in English, and this can be used as template to localize the skeleton's
+error messages. Note that typically programming errors in Schematron are XPath
+errors, which requires localized messages from the XSLT engine.
+
+ANT
+---
+To give an example of how to process a document, here is a sample ANT task.
+
+<target name="schematron-compile-test" >
+
+ <!-- expand inclusions -->
+ <xslt basedir="test/schematron"
+ style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+
+ <!-- expand abstract patterns -->
+ <xslt basedir="test/schematron"
+ style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+
+
+
+ <!-- compile it -->
+ <xslt basedir="test/schematron"
+ style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+
+ <!-- validate -->
+ <xslt basedir="test/schematron"
+ style="test.xsl" in="instance.xml" out="instance.svrlt">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+ </target>
diff --git a/src/lxml/objectify.pyx b/src/lxml/objectify.pyx
index 369ff8f8..f5204e6c 100644
--- a/src/lxml/objectify.pyx
+++ b/src/lxml/objectify.pyx
@@ -1,5 +1,6 @@
# cython: binding=True
# cython: auto_pickle=False
+# cython: language_level=2
"""
The ``lxml.objectify`` module implements a Python object API for XML.
@@ -76,7 +77,7 @@ PYTYPE_ATTRIBUTE = None
cdef unicode TREE_PYTYPE_NAME = u"TREE"
cdef tuple _unicodeAndUtf8(s):
- return (s, python.PyUnicode_AsUTF8String(s))
+ return s, python.PyUnicode_AsUTF8String(s)
def set_pytype_attribute_tag(attribute_tag=None):
u"""set_pytype_attribute_tag(attribute_tag=None)
@@ -159,7 +160,7 @@ cdef class ObjectifiedElement(ElementBase):
# pickle support for objectified Element
def __reduce__(self):
- return (fromstring, (etree.tostring(self),))
+ return fromstring, (etree.tostring(self),)
property text:
def __get__(self):
@@ -1359,7 +1360,7 @@ cdef _setupPickle(elementTreeReduceFunction):
elementTreeReduceFunction, __unpickleElementTree)
def pickleReduceElementTree(obj):
- return (__unpickleElementTree, (etree.tostring(obj),))
+ return __unpickleElementTree, (etree.tostring(obj),)
_setupPickle(pickleReduceElementTree)
del pickleReduceElementTree
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index bcf4da6f..f6f4fe6d 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -628,10 +628,10 @@ cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
<bytes>filename, len(<bytes>filename))
if ctxt.lastError.message is not NULL:
try:
- message = (ctxt.lastError.message).decode('utf-8')
+ message = ctxt.lastError.message.decode('utf-8')
except UnicodeDecodeError:
# the filename may be in there => play it safe
- message = (ctxt.lastError.message).decode('iso8859-1')
+ message = ctxt.lastError.message.decode('iso8859-1')
message = f"Error reading file '{filename}': {message.strip()}"
else:
message = f"Error reading '{filename}'"
@@ -640,7 +640,7 @@ cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
raise error_log._buildParseException(
XMLSyntaxError, u"Document is not well formed")
elif ctxt.lastError.message is not NULL:
- message = (ctxt.lastError.message).strip()
+ message = ctxt.lastError.message.strip()
code = ctxt.lastError.code
line = ctxt.lastError.line
column = ctxt.lastError.int2
diff --git a/src/lxml/python.pxd b/src/lxml/python.pxd
index 5eb9271c..0d26cdd5 100644
--- a/src/lxml/python.pxd
+++ b/src/lxml/python.pxd
@@ -29,7 +29,7 @@ cdef extern from "Python.h":
char* encoding, char* errors)
cdef cython.unicode PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors)
cdef cython.unicode PyUnicode_DecodeLatin1(char* s, Py_ssize_t size, char* errors)
- cdef object PyUnicode_RichCompare(object o1, object o2, int op) # not in Py2.4
+ cdef object PyUnicode_RichCompare(object o1, object o2, int op)
cdef bytes PyUnicode_AsUTF8String(object ustring)
cdef bytes PyUnicode_AsASCIIString(object ustring)
cdef char* PyUnicode_AS_DATA(object ustring)
diff --git a/src/lxml/sax.pxd b/src/lxml/sax.pxd
new file mode 100644
index 00000000..b1b7d2ad
--- /dev/null
+++ b/src/lxml/sax.pxd
@@ -0,0 +1,16 @@
+# cython: language_level=2
+
+cimport cython
+
+cdef tuple _getNsTag(tag)
+
+cdef class ElementTreeProducer:
+ cdef _element
+ cdef _content_handler
+ cdef _attr_class
+ cdef _empty_attributes
+
+ @cython.locals(element_nsmap=dict)
+ cdef inline _recursive_saxify(self, element, dict parent_nsmap)
+
+ cdef inline _build_qname(self, ns_uri, local_name, dict nsmap, preferred_prefix, bint is_attribute)
diff --git a/src/lxml/sax.py b/src/lxml/sax.py
index 01147513..299c235e 100644
--- a/src/lxml/sax.py
+++ b/src/lxml/sax.py
@@ -1,3 +1,5 @@
+# cython: language_level=2
+
"""
SAX-based adapter to copy trees from/to the Python standard library.
@@ -27,7 +29,7 @@ def _getNsTag(tag):
if tag[0] == '{':
return tuple(tag[1:].split('}', 1))
else:
- return (None, tag)
+ return None, tag
class ElementTreeContentHandler(ContentHandler):
@@ -191,19 +193,26 @@ class ElementTreeProducer(object):
self._content_handler.endDocument()
- def _recursive_saxify(self, element, prefixes):
+ def _recursive_saxify(self, element, parent_nsmap):
content_handler = self._content_handler
tag = element.tag
if tag is Comment or tag is ProcessingInstruction:
if tag is ProcessingInstruction:
content_handler.processingInstruction(
element.target, element.text)
- if element.tail:
- content_handler.characters(element.tail)
+ tail = element.tail
+ if tail:
+ content_handler.characters(tail)
return
+ element_nsmap = element.nsmap
new_prefixes = []
- build_qname = self._build_qname
+ if element_nsmap != parent_nsmap:
+ # There have been updates to the namespace
+ for prefix, ns_uri in element_nsmap.items():
+ if parent_nsmap.get(prefix) != ns_uri:
+ new_prefixes.append( (prefix, ns_uri) )
+
attribs = element.items()
if attribs:
attr_values = {}
@@ -211,39 +220,57 @@ class ElementTreeProducer(object):
for attr_ns_name, value in attribs:
attr_ns_tuple = _getNsTag(attr_ns_name)
attr_values[attr_ns_tuple] = value
- attr_qnames[attr_ns_tuple] = build_qname(
- attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
+ attr_qnames[attr_ns_tuple] = self._build_qname(
+ attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
+ preferred_prefix=None, is_attribute=True)
sax_attributes = self._attr_class(attr_values, attr_qnames)
else:
sax_attributes = self._empty_attributes
ns_uri, local_name = _getNsTag(tag)
- qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
+ qname = self._build_qname(
+ ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)
for prefix, uri in new_prefixes:
content_handler.startPrefixMapping(prefix, uri)
- content_handler.startElementNS((ns_uri, local_name),
- qname, sax_attributes)
- if element.text:
- content_handler.characters(element.text)
+ content_handler.startElementNS(
+ (ns_uri, local_name), qname, sax_attributes)
+ text = element.text
+ if text:
+ content_handler.characters(text)
for child in element:
- self._recursive_saxify(child, prefixes)
+ self._recursive_saxify(child, element_nsmap)
content_handler.endElementNS((ns_uri, local_name), qname)
for prefix, uri in new_prefixes:
content_handler.endPrefixMapping(prefix)
- if element.tail:
- content_handler.characters(element.tail)
+ tail = element.tail
+ if tail:
+ content_handler.characters(tail)
- def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
+ def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):
if ns_uri is None:
return local_name
- try:
- prefix = prefixes[ns_uri]
- except KeyError:
- prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
- new_prefixes.append( (prefix, ns_uri) )
+
+ if not is_attribute and nsmap.get(preferred_prefix) == ns_uri:
+ prefix = preferred_prefix
+ else:
+ # Pick the first matching prefix, in alphabetical order.
+ candidates = [
+ pfx for (pfx, uri) in nsmap.items()
+ if pfx is not None and uri == ns_uri
+ ]
+ prefix = (
+ candidates[0] if len(candidates) == 1
+ else min(candidates) if candidates
+ else None
+ )
+
+ if prefix is None:
+ # Default namespace
+ return local_name
return prefix + ':' + local_name
+
def saxify(element_or_tree, content_handler):
"""One-shot helper to generate SAX events from an XML tree and fire
them against a SAX ContentHandler.
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index f53c323b..3c70258a 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -61,7 +61,7 @@ cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
encoding = encoding.lower()
if encoding not in (u'utf8', u'utf-8'):
if encoding == u'ascii':
- if isutf8(c_text):
+ if isutf8l(c_text, tree.xmlBufferLength(c_buffer)):
# will raise a decode error below
needs_conversion = 1
else:
@@ -418,15 +418,15 @@ cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val):
out[0] = 'x'
out += 1
- if (val < 0x10):
+ if val < 0x10:
ptr = out
- elif (val < 0x100):
+ elif val < 0x100:
ptr = out + 1
- elif (val < 0x1000):
+ elif val < 0x1000:
ptr = out + 2
- elif (val < 0x10000):
+ elif val < 0x10000:
ptr = out + 3
- elif (val < 0x100000):
+ elif val < 0x100000:
ptr = out + 4
else:
ptr = out + 5
@@ -495,56 +495,56 @@ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
return
base = cur = <const char*>string
- while (cur[0] != 0):
- if (cur[0] == '\n'):
- if (base != cur):
+ while cur[0] != 0:
+ if cur[0] == '\n':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 5, "&#10;")
cur += 1
base = cur
- elif (cur[0] == '\r'):
- if (base != cur):
+ elif cur[0] == '\r':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 5, "&#13;")
cur += 1
base = cur
- elif (cur[0] == '\t'):
- if (base != cur):
+ elif cur[0] == '\t':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 4, "&#9;")
cur += 1
base = cur
- elif (cur[0] == '"'):
- if (base != cur):
+ elif cur[0] == '"':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 6, "&quot;")
cur += 1
base = cur
- elif (cur[0] == '<'):
- if (base != cur):
+ elif cur[0] == '<':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 4, "&lt;")
cur += 1
base = cur
- elif (cur[0] == '>'):
- if (base != cur):
+ elif cur[0] == '>':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 4, "&gt;")
cur += 1
base = cur
- elif (cur[0] == '&'):
- if (base != cur):
+ elif cur[0] == '&':
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
tree.xmlOutputBufferWrite(buf, 5, "&amp;")
@@ -553,23 +553,23 @@ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
elif (<const unsigned char>cur[0] >= 0x80) and (cur[1] != 0):
- if (base != cur):
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
ucur = <const unsigned char *>cur
- if (ucur[0] < 0xC0):
+ if ucur[0] < 0xC0:
# invalid UTF-8 sequence
val = ucur[0]
l = 1
- elif (ucur[0] < 0xE0):
+ elif ucur[0] < 0xE0:
val = (ucur[0]) & 0x1F
val <<= 6
val |= (ucur[1]) & 0x3F
l = 2
- elif ((ucur[0] < 0xF0) and (ucur[2] != 0)):
+ elif (ucur[0] < 0xF0) and (ucur[2] != 0):
val = (ucur[0]) & 0x0F
val <<= 6
val |= (ucur[1]) & 0x3F
@@ -577,7 +577,7 @@ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
val |= (ucur[2]) & 0x3F
l = 3
- elif ((ucur[0] < 0xF8) and (ucur[2] != 0) and (ucur[3] != 0)):
+ elif (ucur[0] < 0xF8) and (ucur[2] != 0) and (ucur[3] != 0):
val = (ucur[0]) & 0x07
val <<= 6
val |= (ucur[1]) & 0x3F
@@ -591,7 +591,7 @@ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
val = ucur[0]
l = 1
- if ((l == 1) or (not tree.xmlIsCharQ(val))):
+ if (l == 1) or (not tree.xmlIsCharQ(val)):
raise ValueError(f"Invalid character: {val:X}")
# We could do multiple things here. Just save
@@ -604,7 +604,7 @@ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
else:
cur += 1
- if (base != cur):
+ if base != cur:
tree.xmlOutputBufferWrite(buf, cur - base, base)
diff --git a/src/lxml/tests/dummy_http_server.py b/src/lxml/tests/dummy_http_server.py
index b92c5a5f..70ef8d6a 100644
--- a/src/lxml/tests/dummy_http_server.py
+++ b/src/lxml/tests/dummy_http_server.py
@@ -1,5 +1,5 @@
"""
-Simple HTTP request dumper for tests in Python 2.5+.
+Simple HTTP request dumper for tests.
"""
import sys
diff --git a/src/lxml/tests/selftest.py b/src/lxml/tests/selftest.py
index f77b42e2..6ee0ff6d 100644
--- a/src/lxml/tests/selftest.py
+++ b/src/lxml/tests/selftest.py
@@ -823,51 +823,40 @@ def xpath_tokenizer(p):
#
# xinclude tests (samples from appendix C of the xinclude specification)
-XINCLUDE = {}
-
-XINCLUDE["C1.xml"] = """\
+XINCLUDE = {
+ "C1.xml": """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>120 Mz is adequate for an average home user.</p>
<xi:include href="disclaimer.xml"/>
</document>
-"""
-
-XINCLUDE["disclaimer.xml"] = """\
+""", "disclaimer.xml": """\
<?xml version='1.0'?>
<disclaimer>
<p>The opinions represented herein represent those of the individual
and should not be interpreted as official policy endorsed by this
organization.</p>
</disclaimer>
-"""
-
-XINCLUDE["C2.xml"] = """\
+""",
+ "C2.xml": """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>This document has been accessed
<xi:include href="count.txt" parse="text"/> times.</p>
</document>
-"""
-
-XINCLUDE["count.txt"] = "324387"
-
-XINCLUDE["C3.xml"] = """\
+""", "count.txt": "324387", "C3.xml": """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>The following is the source of the "data.xml" resource:</p>
<example><xi:include href="data.xml" parse="text"/></example>
</document>
-"""
-
-XINCLUDE["data.xml"] = """\
+""", "data.xml": """\
<?xml version='1.0'?>
<data>
<item><![CDATA[Brooks & Shields]]></item>
</data>
-"""
-
-XINCLUDE["C5.xml"] = """\
+""",
+ "C5.xml": """\
<?xml version='1.0'?>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="example.txt" parse="text">
@@ -878,15 +867,15 @@ XINCLUDE["C5.xml"] = """\
</xi:fallback>
</xi:include>
</div>
-"""
-
-XINCLUDE["default.xml"] = """\
+""",
+ "default.xml": """\
<?xml version='1.0'?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<p>Example.</p>
<xi:include href="samples/simple.xml"/>
</document>
-"""
+"""}
+
def xinclude_loader(href, parse="xml", encoding=None):
try:
diff --git a/src/lxml/tests/selftest2.py b/src/lxml/tests/selftest2.py
index d1e289ea..80477af5 100644
--- a/src/lxml/tests/selftest2.py
+++ b/src/lxml/tests/selftest2.py
@@ -102,9 +102,9 @@ def check_element(element):
print("no tail member")
check_string(element.tag)
check_mapping(element.attrib)
- if element.text != None:
+ if element.text is not None:
check_string(element.text)
- if element.tail != None:
+ if element.tail is not None:
check_string(element.tail)
def check_element_tree(tree):
diff --git a/src/lxml/tests/test_doctestcompare.py b/src/lxml/tests/test_doctestcompare.py
index 44179d91..1d9625fc 100644
--- a/src/lxml/tests/test_doctestcompare.py
+++ b/src/lxml/tests/test_doctestcompare.py
@@ -1,4 +1,3 @@
-import sys
import unittest
from lxml import etree
@@ -123,8 +122,7 @@ class DoctestCompareTest(HelperTestCase):
def test_suite():
suite = unittest.TestSuite()
- if sys.version_info >= (2,4):
- suite.addTests([unittest.makeSuite(DoctestCompareTest)])
+ suite.addTests([unittest.makeSuite(DoctestCompareTest)])
return suite
diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py
index 2d31cc01..0b82a574 100644
--- a/src/lxml/tests/test_elementtree.py
+++ b/src/lxml/tests/test_elementtree.py
@@ -20,7 +20,7 @@ from common_imports import ElementTree, cElementTree, ET_VERSION, CET_VERSION
from common_imports import filter_by_version, fileInTestDir, canonicalize, HelperTestCase
from common_imports import _str, _bytes, unicode, next
-if cElementTree is not None and (CET_VERSION <= (1,0,7) or sys.version_info >= (3,3)):
+if cElementTree is not None and (CET_VERSION <= (1,0,7) or sys.version_info[0] >= 3):
cElementTree = None
if ElementTree is not None:
@@ -3929,9 +3929,9 @@ class _ETreeTestCaseBase(HelperTestCase):
self.assertTrue(hasattr(element, 'tail'))
self._check_string(element.tag)
self._check_mapping(element.attrib)
- if element.text != None:
+ if element.text is not None:
self._check_string(element.text)
- if element.tail != None:
+ if element.tail is not None:
self._check_string(element.tail)
def _check_string(self, string):
@@ -4101,7 +4101,7 @@ class _XMLPullParserTest(unittest.TestCase):
def test_events_sequence(self):
# Test that events can be some sequence that's not just a tuple or list
- eventset = set(['end', 'start'])
+ eventset = {'end', 'start'}
parser = self.etree.XMLPullParser(events=eventset)
self._feed(parser, "<foo>bar</foo>")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 89f77eba..bfb438e2 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -697,7 +697,7 @@ class ETreeOnlyTestCase(HelperTestCase):
def name(event, el):
if event == 'pi':
- return (el.target, el.text)
+ return el.target, el.text
else:
return el.tag
@@ -1503,42 +1503,41 @@ class ETreeOnlyTestCase(HelperTestCase):
xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
- if etree.LIBXML_VERSION > (2,6,20):
- def test_entity_parse(self):
- parse = self.etree.parse
- tostring = self.etree.tostring
- parser = self.etree.XMLParser(resolve_entities=False)
- Entity = self.etree.Entity
-
- xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
- tree = parse(BytesIO(xml), parser)
- root = tree.getroot()
- self.assertEqual(root[0].tag, Entity)
- self.assertEqual(root[0].text, "&myentity;")
- self.assertEqual(root[0].tail, None)
- self.assertEqual(root[0].name, "myentity")
-
- self.assertEqual(_bytes('<doc>&myentity;</doc>'),
- tostring(root))
-
- def test_entity_restructure(self):
- xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp "&#160;"> ]>
- <root>
- <child1/>
- <child2/>
- <child3>&nbsp;</child3>
- </root>''')
-
- parser = self.etree.XMLParser(resolve_entities=False)
- root = etree.fromstring(xml, parser)
- self.assertEqual([ el.tag for el in root ],
- ['child1', 'child2', 'child3'])
-
- root[0] = root[-1]
- self.assertEqual([ el.tag for el in root ],
- ['child3', 'child2'])
- self.assertEqual(root[0][0].text, '&nbsp;')
- self.assertEqual(root[0][0].name, 'nbsp')
+ def test_entity_parse(self):
+ parse = self.etree.parse
+ tostring = self.etree.tostring
+ parser = self.etree.XMLParser(resolve_entities=False)
+ Entity = self.etree.Entity
+
+ xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
+ tree = parse(BytesIO(xml), parser)
+ root = tree.getroot()
+ self.assertEqual(root[0].tag, Entity)
+ self.assertEqual(root[0].text, "&myentity;")
+ self.assertEqual(root[0].tail, None)
+ self.assertEqual(root[0].name, "myentity")
+
+ self.assertEqual(_bytes('<doc>&myentity;</doc>'),
+ tostring(root))
+
+ def test_entity_restructure(self):
+ xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp "&#160;"> ]>
+ <root>
+ <child1/>
+ <child2/>
+ <child3>&nbsp;</child3>
+ </root>''')
+
+ parser = self.etree.XMLParser(resolve_entities=False)
+ root = etree.fromstring(xml, parser)
+ self.assertEqual([ el.tag for el in root ],
+ ['child1', 'child2', 'child3'])
+
+ root[0] = root[-1]
+ self.assertEqual([ el.tag for el in root ],
+ ['child3', 'child2'])
+ self.assertEqual(root[0][0].text, '&nbsp;')
+ self.assertEqual(root[0][0].name, 'nbsp')
def test_entity_append(self):
Entity = self.etree.Entity
@@ -4613,10 +4612,8 @@ def test_suite():
suite.addTests(doctest.DocTestSuite(etree))
suite.addTests(
[make_doctest('../../../doc/tutorial.txt')])
- if sys.version_info >= (2,6):
- # now requires the 'with' statement
- suite.addTests(
- [make_doctest('../../../doc/api.txt')])
+ suite.addTests(
+ [make_doctest('../../../doc/api.txt')])
suite.addTests(
[make_doctest('../../../doc/FAQ.txt')])
suite.addTests(
diff --git a/src/lxml/tests/test_external_document.py b/src/lxml/tests/test_external_document.py
index d28328a3..82ba4228 100644
--- a/src/lxml/tests/test_external_document.py
+++ b/src/lxml/tests/test_external_document.py
@@ -5,17 +5,14 @@ Test cases related to direct loading of external libxml2 documents
from __future__ import absolute_import
-import sys
import unittest
-from .common_imports import HelperTestCase, etree, skipIf
+from .common_imports import HelperTestCase, etree
DOC_NAME = b'libxml2:xmlDoc'
DESTRUCTOR_NAME = b'destructor:xmlFreeDoc'
-@skipIf(sys.version_info[:2] < (2, 7),
- 'Not supported for python < 2.7')
class ExternalDocumentTestCase(HelperTestCase):
def setUp(self):
import ctypes
diff --git a/src/lxml/tests/test_http_io.py b/src/lxml/tests/test_http_io.py
index 2e62626e..d058fad2 100644
--- a/src/lxml/tests/test_http_io.py
+++ b/src/lxml/tests/test_http_io.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
-Web IO test cases that need Python 2.5+ (wsgiref)
+Web IO test cases (wsgiref)
"""
from __future__ import with_statement
diff --git a/src/lxml/tests/test_incremental_xmlfile.py b/src/lxml/tests/test_incremental_xmlfile.py
index 4fc8efef..ac394d6d 100644
--- a/src/lxml/tests/test_incremental_xmlfile.py
+++ b/src/lxml/tests/test_incremental_xmlfile.py
@@ -82,7 +82,7 @@ class _XmlFileTestCaseBase(HelperTestCase):
tree = self._parse_file()
self.assertTrue(tree is not None)
self.assertEqual(100, len(tree.getroot()))
- self.assertEqual(set(['test']), set(el.tag for el in tree.getroot()))
+ self.assertEqual({'test'}, {el.tag for el in tree.getroot()})
def test_namespace_nsmap(self):
with etree.xmlfile(self._file) as xf:
@@ -440,11 +440,9 @@ class HtmlFileTestCase(_XmlFileTestCaseBase):
def test_void_elements(self):
# http://www.w3.org/TR/html5/syntax.html#elements-0
- void_elements = set([
- "area", "base", "br", "col", "embed", "hr", "img",
- "input", "keygen", "link", "meta", "param",
- "source", "track", "wbr"
- ])
+ void_elements = {
+ "area", "base", "br", "col", "embed", "hr", "img", "input",
+ "keygen", "link", "meta", "param", "source", "track", "wbr"}
# FIXME: These don't get serialized as void elements.
void_elements.difference_update([
diff --git a/src/lxml/tests/test_io.py b/src/lxml/tests/test_io.py
index 06199875..33e59010 100644
--- a/src/lxml/tests/test_io.py
+++ b/src/lxml/tests/test_io.py
@@ -15,17 +15,6 @@ from common_imports import etree, ElementTree, _str, _bytes
from common_imports import SillyFileLike, LargeFileLike, HelperTestCase
from common_imports import read_file, write_to_file, BytesIO
-if sys.version_info < (2,6):
- class NamedTemporaryFile(object):
- def __init__(self, delete=True, **kwargs):
- self._tmpfile = tempfile.NamedTemporaryFile(**kwargs)
- def close(self):
- self._tmpfile.flush()
- def __getattr__(self, name):
- return getattr(self._tmpfile, name)
-else:
- NamedTemporaryFile = tempfile.NamedTemporaryFile
-
class _IOTestCaseBase(HelperTestCase):
"""(c)ElementTree compatibility for IO functions/methods
@@ -285,7 +274,7 @@ class _IOTestCaseBase(HelperTestCase):
bom = _bytes('\\xEF\\xBB\\xBF').decode(
"unicode_escape").encode("latin1")
self.assertEqual(3, len(bom))
- f = NamedTemporaryFile(delete=False)
+ f = tempfile.NamedTemporaryFile(delete=False)
try:
try:
f.write(bom)
@@ -303,7 +292,7 @@ class _IOTestCaseBase(HelperTestCase):
bom = _bytes('\\xEF\\xBB\\xBF').decode(
"unicode_escape").encode("latin1")
self.assertEqual(3, len(bom))
- f = NamedTemporaryFile(delete=False)
+ f = tempfile.NamedTemporaryFile(delete=False)
try:
try:
f.write(bom)
@@ -326,7 +315,7 @@ class _IOTestCaseBase(HelperTestCase):
xml = uxml.encode("utf-16")
self.assertTrue(xml[:2] in boms, repr(xml[:2]))
- f = NamedTemporaryFile(delete=False)
+ f = tempfile.NamedTemporaryFile(delete=False)
try:
try:
f.write(xml)
diff --git a/src/lxml/tests/test_isoschematron.py b/src/lxml/tests/test_isoschematron.py
index 1d2e948b..01c600c5 100644
--- a/src/lxml/tests/test_isoschematron.py
+++ b/src/lxml/tests/test_isoschematron.py
@@ -268,16 +268,14 @@ class ETreeISOSchematronTestCase(HelperTestCase):
self.assertTrue(not valid)
self.assertTrue(
isinstance(schematron.validation_report, etree._ElementTree),
- 'expected a validation report result tree, got: %s' %
- (schematron.validation_report))
+ 'expected a validation report result tree, got: %s' % schematron.validation_report)
schematron = isoschematron.Schematron(schema, store_report=False)
self.assertTrue(schematron(tree_valid), schematron.error_log)
valid = schematron(tree_invalid)
self.assertTrue(not valid)
self.assertTrue(schematron.validation_report is None,
- 'validation reporting switched off, still: %s' %
- (schematron.validation_report))
+ 'validation reporting switched off, still: %s' % schematron.validation_report)
def test_schematron_store_schematron(self):
schema = self.parse('''\
diff --git a/src/lxml/tests/test_objectify.py b/src/lxml/tests/test_objectify.py
index 68b9d7a8..86bdae89 100644
--- a/src/lxml/tests/test_objectify.py
+++ b/src/lxml/tests/test_objectify.py
@@ -462,7 +462,7 @@ class ObjectifyTestCase(HelperTestCase):
self.assertEqual([root.c1],
list(iter(root.c1)))
self.assertEqual([root.c1.c2[0], root.c1.c2[1], root.c1.c2[2]],
- list(iter((root.c1.c2))))
+ list(iter(root.c1.c2)))
def test_class_lookup(self):
root = self.XML(xml_str)
@@ -2621,9 +2621,7 @@ def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ObjectifyTestCase)])
suite.addTests(doctest.DocTestSuite(objectify))
- if sys.version_info >= (2,4):
- suite.addTests(
- [make_doctest('../../../doc/objectify.txt')])
+ suite.addTests([make_doctest('../../../doc/objectify.txt')])
return suite
if __name__ == '__main__':
diff --git a/src/lxml/tests/test_pyclasslookup.py b/src/lxml/tests/test_pyclasslookup.py
index cb4eb5dc..9d164190 100644
--- a/src/lxml/tests/test_pyclasslookup.py
+++ b/src/lxml/tests/test_pyclasslookup.py
@@ -5,7 +5,7 @@ Tests specific to the Python based class lookup.
"""
-import unittest, operator, os.path, sys
+import unittest, os.path, sys
this_dir = os.path.dirname(__file__)
if this_dir not in sys.path:
diff --git a/src/lxml/tests/test_sax.py b/src/lxml/tests/test_sax.py
index 5b1b3089..adc5e736 100644
--- a/src/lxml/tests/test_sax.py
+++ b/src/lxml/tests/test_sax.py
@@ -13,6 +13,7 @@ if this_dir not in sys.path:
from common_imports import HelperTestCase, make_doctest, BytesIO, _bytes
from lxml import sax
from xml.dom import pulldom
+from xml.sax.handler import ContentHandler
class ETreeSaxTestCase(HelperTestCase):
@@ -87,6 +88,8 @@ class ETreeSaxTestCase(HelperTestCase):
dom.firstChild.localName)
self.assertEqual('blaA',
dom.firstChild.namespaceURI)
+ self.assertEqual(None,
+ dom.firstChild.prefix)
children = dom.firstChild.childNodes
self.assertEqual('ab',
@@ -96,6 +99,33 @@ class ETreeSaxTestCase(HelperTestCase):
self.assertEqual('ba',
children[2].nodeValue)
+ def test_sax_to_pulldom_multiple_namespaces(self):
+ tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>')
+ handler = pulldom.SAX2DOM()
+ sax.saxify(tree, handler)
+ dom = handler.document
+
+ # With multiple prefix definitions, the node should keep the one
+ # that was actually used, even if the others also are valid.
+ self.assertEqual('a',
+ dom.firstChild.localName)
+ self.assertEqual('blaA',
+ dom.firstChild.namespaceURI)
+ self.assertEqual(None,
+ dom.firstChild.prefix)
+
+ tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>')
+ handler = pulldom.SAX2DOM()
+ sax.saxify(tree, handler)
+ dom = handler.document
+
+ self.assertEqual('a',
+ dom.firstChild.localName)
+ self.assertEqual('blaA',
+ dom.firstChild.namespaceURI)
+ self.assertEqual('a',
+ dom.firstChild.prefix)
+
def test_element_sax(self):
tree = self.parse('<a><b/></a>')
a = tree.getroot()
@@ -267,9 +297,118 @@ class ETreeSaxTestCase(HelperTestCase):
return f.getvalue().replace(_bytes('\n'), _bytes(''))
+class SimpleContentHandler(ContentHandler, object):
+ """A SAX content handler that just stores the events"""
+
+ def __init__(self):
+ self.sax_events = []
+ super(SimpleContentHandler, self).__init__()
+
+ def startDocument(self):
+ self.sax_events.append(('startDocument',))
+
+ def endDocument(self):
+ self.sax_events.append(('endDocument',))
+
+ def startPrefixMapping(self, prefix, uri):
+ self.sax_events.append(('startPrefixMapping', prefix, uri))
+
+ def endPrefixMapping(self, prefix):
+ self.sax_events.append(('endPrefixMapping', prefix))
+
+ def startElement(self, name, attrs):
+ self.sax_events.append(('startElement', name, dict(attrs)))
+
+ def endElement(self, name):
+ self.sax_events.append(('endElement', name))
+
+ def startElementNS(self, name, qname, attrs):
+ self.sax_events.append(('startElementNS', name, qname, attrs._qnames))
+
+ def endElementNS(self, name, qname):
+ self.sax_events.append(('endElementNS', name, qname))
+
+ def characters(self, content):
+ self.sax_events.append(('characters', content))
+
+ def ignorableWhitespace(self, whitespace):
+ self.sax_events.append(('ignorableWhitespace', whitespace))
+
+ def processingInstruction(self, target, data):
+ self.sax_events.append(('processingInstruction', target, data))
+
+ def skippedEntity(self, name):
+ self.sax_events.append(('skippedEntity', name))
+
+
+class NSPrefixSaxTestCase(HelperTestCase):
+ """Testing that namespaces generate the right SAX events"""
+
+ def _saxify(self, tree):
+ handler = SimpleContentHandler()
+ sax.ElementTreeProducer(tree, handler).saxify()
+ return handler.sax_events
+
+ def test_element_sax_ns_prefix(self):
+ # The name of the prefix should be preserved, if the uri is unique
+ tree = self.parse('<a:a xmlns:a="blaA" xmlns:c="blaC">'
+ '<d a:attr="value" c:attr="value" /></a:a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ [('startElementNS', ('blaA', 'a'), 'a:a', {}),
+ ('startElementNS', (None, 'd'), 'd',
+ {('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}),
+ ('endElementNS', (None, 'd'), 'd'),
+ ('endElementNS', ('blaA', 'a'), 'a:a'),
+ ],
+ self._saxify(a)[3:7])
+
+ def test_element_sax_default_ns_prefix(self):
+ # Default prefixes should also not get a generated prefix
+ tree = self.parse('<a xmlns="blaA"><b attr="value" /></a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ [('startDocument',),
+ # NS prefix should be None:
+ ('startPrefixMapping', None, 'blaA'),
+ ('startElementNS', ('blaA', 'a'), 'a', {}),
+ # Attribute prefix should be None:
+ ('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}),
+ ('endElementNS', ('blaA', 'b'), 'b'),
+ ('endElementNS', ('blaA', 'a'), 'a'),
+ # Prefix should be None again:
+ ('endPrefixMapping', None),
+ ('endDocument',)],
+ self._saxify(a))
+
+ # Except for attributes, if there is both a default namespace
+ # and a named namespace with the same uri
+ tree = self.parse('<a xmlns="bla" xmlns:a="bla">'
+ '<b a:attr="value" /></a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ ('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}),
+ self._saxify(a)[4])
+
+ def test_element_sax_twin_ns_prefix(self):
+ # Make an element with an doubly registered uri
+ tree = self.parse('<a xmlns:b="bla" xmlns:c="bla">'
+ '<d c:attr="attr" /></a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ # It should get the b prefix in this case
+ ('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}),
+ self._saxify(a)[4])
+
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeSaxTestCase)])
+ suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)])
suite.addTests(
[make_doctest('../../../doc/sax.txt')])
return suite
diff --git a/src/lxml/tests/test_threading.py b/src/lxml/tests/test_threading.py
index 8948c3ec..66e164b2 100644
--- a/src/lxml/tests/test_threading.py
+++ b/src/lxml/tests/test_threading.py
@@ -130,7 +130,7 @@ class ThreadingTestCase(HelperTestCase):
<xsl:template match="tag" />
<!-- extend time for parsing + transform -->
''' + '\n'.join('<xsl:template match="tag%x" />' % i for i in range(200)) + '''
- <xsl:foo />
+ <xsl:UnExpectedElement />
</xsl:stylesheet>''')
self.assertRaises(etree.XSLTParseError,
etree.XSLT, style)
@@ -153,9 +153,10 @@ class ThreadingTestCase(HelperTestCase):
self.assertTrue(len(log))
if last_log is not None:
self.assertEqual(len(last_log), len(log))
- self.assertEqual(4, len(log))
+ self.assertTrue(len(log) >= 2, len(log))
for error in log:
- self.assertTrue(':ERROR:XSLT:' in str(error))
+ self.assertTrue(':ERROR:XSLT:' in str(error), str(error))
+ self.assertTrue(any('UnExpectedElement' in str(error) for error in log), log)
last_log = log
def test_thread_xslt_apply_error_log(self):
@@ -513,7 +514,7 @@ class ThreadPipelineTestCase(HelperTestCase):
last = worker_class(last.out_queue, item_count, **kwargs)
last.setDaemon(True)
last.start()
- return (in_queue, start, last)
+ return in_queue, start, last
def test_thread_pipeline_thread_parse(self):
item_count = self.item_count
diff --git a/src/lxml/xmlid.pxi b/src/lxml/xmlid.pxi
index b5b5c64a..c1f2bbf1 100644
--- a/src/lxml/xmlid.pxi
+++ b/src/lxml/xmlid.pxi
@@ -19,7 +19,7 @@ def XMLID(text, parser=None, *, base_url=None):
dic = {}
for elem in _find_id_attributes(root):
dic[elem.get(u'id')] = elem
- return (root, dic)
+ return root, dic
def XMLDTDID(text, parser=None, *, base_url=None):
u"""XMLDTDID(text, parser=None, base_url=None)
@@ -37,9 +37,9 @@ def XMLDTDID(text, parser=None, *, base_url=None):
root = XML(text, parser, base_url=base_url)
# xml:id spec compatible implementation: use DTD ID attributes from libxml2
if root._doc._c_doc.ids is NULL:
- return (root, {})
+ return root, {}
else:
- return (root, _IDDict(root))
+ return root, _IDDict(root)
def parseid(source, parser=None, *, base_url=None):
u"""parseid(source, parser=None)
@@ -53,7 +53,7 @@ def parseid(source, parser=None, *, base_url=None):
"""
cdef _Document doc
doc = _parseDocument(source, parser, base_url)
- return (_elementTreeFactory(doc, None), _IDDict(doc))
+ return _elementTreeFactory(doc, None), _IDDict(doc)
cdef class _IDDict:
u"""IDDict(self, etree)
diff --git a/src/lxml/xpath.pxi b/src/lxml/xpath.pxi
index 6c446737..784987d4 100644
--- a/src/lxml/xpath.pxi
+++ b/src/lxml/xpath.pxi
@@ -101,7 +101,7 @@ cdef class _XPathContext(_BaseContext):
cdef void _registerExsltFunctionsForNamespaces(
- void* _c_href, void* _ctxt, xmlChar* c_prefix):
+ void* _c_href, void* _ctxt, const_xmlChar* c_prefix):
c_href = <const_xmlChar*> _c_href
ctxt = <xpath.xmlXPathContext*> _ctxt
diff --git a/test.py b/test.py
index 23c7dd72..dd05cf8d 100644
--- a/test.py
+++ b/test.py
@@ -455,8 +455,8 @@ def main(argv):
"""Main program."""
# Environment
- if sys.version_info < (2, 6):
- stderr('%s: need Python 2.6 or later' % argv[0])
+ if sys.version_info < (2, 7):
+ stderr('%s: need Python 2.7 or later' % argv[0])
stderr('your python is %s' % sys.version)
return 1
diff --git a/tools/manylinux/build-wheels.sh b/tools/manylinux/build-wheels.sh
index 24612f47..3b13616f 100755
--- a/tools/manylinux/build-wheels.sh
+++ b/tools/manylinux/build-wheels.sh
@@ -36,8 +36,6 @@ run_tests() {
prepare_system() {
#yum install -y zlib-devel
- # Remove Python 2.6 symlinks
- rm -f /opt/python/cp26*
echo "Python versions found: $(cd /opt/python && echo cp* | sed -e 's|[^ ]*-||g')"
}
diff --git a/tox.ini b/tox.ini
index b03a589b..d1a71a91 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
# and then run "tox" from this directory.
[tox]
-envlist = py26, py27, py32, py33, py34
+envlist = py27, py34, py35, py36, py37
[testenv]
setenv =
diff --git a/version.txt b/version.txt
index d6f85abf..c7d79363 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-4.2.6
+4.3.0a0