summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBurak Arslan <burak.arslan@arskom.com.tr>2017-02-06 13:40:24 +0300
committerBurak Arslan <burak.arslan@arskom.com.tr>2017-02-06 13:40:24 +0300
commit83470a34934a231471353df42b5e136509c8e40d (patch)
tree50525fafa1f38d6bc1ca552207b739161bd20cd5
parent0bfc0d9903258fcc9311652d26a22638eaa5ee7e (diff)
parentd64c789ab2bbdf8ccd63fa819e5c23597d4b7bd0 (diff)
downloadpython-lxml-83470a34934a231471353df42b5e136509c8e40d.tar.gz
Merge remote-tracking branch 'lxml/master' into method-ctxmanager
# Conflicts: # src/lxml/serializer.pxi
-rw-r--r--.appveyor.yml2
-rw-r--r--.gitignore2
-rw-r--r--.hgignore3
-rw-r--r--.travis.yml2
-rw-r--r--CHANGES.txt41
-rw-r--r--Makefile7
-rw-r--r--buildlibxml.py7
-rw-r--r--doc/main.txt54
-rw-r--r--src/lxml/includes/tree.pxd3
-rw-r--r--src/lxml/lxml.etree.pyx23
-rw-r--r--src/lxml/serializer.pxi217
-rw-r--r--src/lxml/tests/common_imports.py7
-rw-r--r--src/lxml/tests/test_etree.py100
-rw-r--r--src/lxml/tests/test_incremental_xmlfile.py21
-rwxr-xr-xtools/manylinux/build-wheels.sh4
-rw-r--r--version.txt2
16 files changed, 374 insertions, 121 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 2f6a2f09..b0127f6d 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,6 +5,8 @@ environment:
- python: 27
- python: 35
- python: 35-x64
+ - python: 36
+ - python: 36-x64
install:
- cmd: >-
diff --git a/.gitignore b/.gitignore
index c708f20c..ea137ead 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
.idea
build
dist
+wheelhouse
doc/html
libs
*.egg-info
@@ -10,6 +11,7 @@ libs
*.so
*.o
*.pyd
+MANIFEST
src/lxml/includes/lxml-version.h
src/lxml/*.html
diff --git a/.hgignore b/.hgignore
index 6503fe77..c30692ae 100644
--- a/.hgignore
+++ b/.hgignore
@@ -13,6 +13,7 @@ src/lxml/lxml.objectify.c
build/
dist/
+wheelhouse/
doc/html/
cython_debug/
.idea/
@@ -20,6 +21,7 @@ cython_debug/
.gitrev
.coverage
funding.txt
+MANIFEST
.tox
*.orig
*.rej
@@ -30,3 +32,4 @@ funding.txt
*.o
*.pyd
*~
+*.egg-info
diff --git a/.travis.yml b/.travis.yml
index 89a61329..442adf19 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,10 +3,10 @@ language: python
python:
- 2.6
- 2.7
- - 3.2
- 3.3
- 3.4
- 3.5
+ - 3.6
- pypy
- pypy3
diff --git a/CHANGES.txt b/CHANGES.txt
index 0c9d59fd..155c1441 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,47 @@
lxml changelog
==============
+3.8.0 (2017-??-??)
+==================
+
+Features added
+--------------
+
+* ``ElementTree.write()`` has a new option ``doctype`` that writes out a
+ doctype string before the serialisation, in the same way as ``tostring()``.
+
+Other changes
+-------------
+
+* The previously undocumented ``docstring`` option in ``ElementTree.write()``
+ produces a deprecation warning and will eventually be removed.
+
+
+3.7.3 (2017-??-??)
+==================
+
+Bugs fixed
+----------
+
+* GH#218 was ineffective in Python 3.
+
+
+3.7.2 (2017-01-08)
+==================
+
+Bugs fixed
+----------
+
+* Work around installation problems in recent Python 2.7 versions
+ due to FTP download failures.
+
+* GH#219: ``xmlfile.element()`` was not properly quoting attribute values.
+ Patch by Burak Arslan.
+
+* GH#218: ``xmlfile.element()`` was not properly escaping text content of
+ script/style tags. Patch by Burak Arslan.
+
+
3.7.1 (2016-12-23)
==================
diff --git a/Makefile b/Makefile
index e19906a7..dce52d96 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ CYTHON3_WITH_COVERAGE=$(shell $(PYTHON3) -c 'import Cython.Coverage; import sys;
MANYLINUX_LIBXML2_VERSION=2.9.3
MANYLINUX_LIBXSLT_VERSION=1.1.29
MANYLINUX_IMAGE_X86_64=quay.io/pypa/manylinux1_x86_64
+MANYLINUX_IMAGE_686=quay.io/pypa/manylinux1_i686
.PHONY: all inplace rebuild-sdist sdist build require-cython wheel_manylinux wheel
@@ -39,14 +40,16 @@ require-cython:
@[ -n "$(PYTHON_WITH_CYTHON)" ] || { \
echo "NOTE: missing Cython - please use this command to install it: $(PYTHON) -m pip install Cython"; false; }
-wheel_manylinux: dist/lxml-$(LXMLVERSION).tar.gz
+wheel_manylinux: wheel_manylinux64 # wheel_manylinux32
+
+wheel_manylinux32 wheel_manylinux64: dist/lxml-$(LXMLVERSION).tar.gz
time docker run --rm -t \
-v $(shell pwd):/io \
-e CFLAGS="-O3 -mtune=generic -pipe -fPIC" \
-e LDFLAGS="$(LDFLAGS)" \
-e LIBXML2_VERSION="$(MANYLINUX_LIBXML2_VERSION)" \
-e LIBXSLT_VERSION="$(MANYLINUX_LIBXSLT_VERSION)" \
- $(MANYLINUX_IMAGE_X86_64) \
+ $(if $(patsubst %32,,$@),$(MANYLINUX_IMAGE_X86_64),$(MANYLINUX_IMAGE_686)) \
bash /io/tools/manylinux/build-wheels.sh /io/$<
wheel:
diff --git a/buildlibxml.py b/buildlibxml.py
index 88567eaa..bbdf62b0 100644
--- a/buildlibxml.py
+++ b/buildlibxml.py
@@ -1,14 +1,14 @@
import os, re, sys, subprocess
import tarfile
-from distutils import log, sysconfig, version
+from distutils import log, version
from contextlib import closing
try:
from urlparse import urlsplit, urljoin, unquote
- from urllib import urlretrieve, urlopen
+ from urllib import urlretrieve, urlopen, urlcleanup
except ImportError:
from urllib.parse import urlsplit, urljoin, unquote
- from urllib.request import urlretrieve, urlopen
+ from urllib.request import urlretrieve, urlopen, urlcleanup
multi_make_options = []
try:
@@ -253,6 +253,7 @@ def download_library(dest_dir, location, name, version_re, filename, version=Non
% (name, dest_filename))
else:
print('Downloading %s into %s' % (name, dest_filename))
+ urlcleanup() # work around FTP bug 27973 in Py2.7.12+
urlretrieve(full_url, dest_filename)
return dest_filename
diff --git a/doc/main.txt b/doc/main.txt
index f19d935e..c9ca624b 100644
--- a/doc/main.txt
+++ b/doc/main.txt
@@ -35,7 +35,7 @@ libxml2_ and libxslt_. It is unique in that it combines the speed and
XML feature completeness of these libraries with the simplicity of a
native Python API, mostly compatible but superior to the well-known
ElementTree_ API. The latest release works with all CPython versions
-from 2.6 to 3.5. See the introduction_ for more information about
+from 2.6 to 3.6. See the introduction_ for more information about
background and goals of the lxml project. Some common questions are
answered in the FAQ_.
@@ -157,8 +157,8 @@ Index <http://pypi.python.org/pypi/lxml/>`_ (PyPI). It has the source
that compiles on various platforms. The source distribution is signed
with `this key <pubkey.asc>`_.
-The latest version is `lxml 3.7.1`_, released 2016-12-23
-(`changes for 3.7.1`_). `Older versions <#old-versions>`_
+The latest version is `lxml 3.7.2`_, released 2017-01-08
+(`changes for 3.7.2`_). `Older versions <#old-versions>`_
are listed below.
Please take a look at the
@@ -229,7 +229,7 @@ obstacle to using lxml in your codebase.
Old Versions
------------
-See the web sites of lxml
+See the websites of lxml
`1.3 <http://lxml.de/1.3/>`_,
`2.0 <http://lxml.de/2.0/>`_,
`2.1 <http://lxml.de/2.1/>`_,
@@ -238,13 +238,17 @@ See the web sites of lxml
`3.0 <http://lxml.de/3.0/>`_,
`3.1 <http://lxml.de/3.1/>`_,
`3.2 <http://lxml.de/3.2/>`_,
-`3.3 <http://lxml.de/3.3/>`_
-`3.4 <http://lxml.de/3.4/>`_
-`3.5 <http://lxml.de/3.5/>`_
+`3.3 <http://lxml.de/3.3/>`_,
+`3.4 <http://lxml.de/3.4/>`_,
+`3.5 <http://lxml.de/3.5/>`_,
`3.6 <http://lxml.de/3.6/>`_
-and the `latest in-development version <http://lxml.de/dev/>`_.
-.. _`PDF documentation`: lxmldoc-3.7.0.pdf
+..
+ and the `latest in-development version <http://lxml.de/dev/>`_.
+
+.. _`PDF documentation`: lxmldoc-3.7.2.pdf
+
+* `lxml 3.7.2`_, released 2017-01-08 (`changes for 3.7.2`_)
* `lxml 3.7.1`_, released 2016-12-22 (`changes for 3.7.1`_)
@@ -260,22 +264,9 @@ and the `latest in-development version <http://lxml.de/dev/>`_.
* `lxml 3.6.0`_, released 2016-03-17 (`changes for 3.6.0`_)
-* `lxml 3.5.0`_, released 2015-11-13 (`changes for 3.5.0`_)
-
-* `lxml 3.5.0b1`_, released 2015-09-18 (`changes for 3.5.0b1`_)
-
-* `lxml 3.4.4`_, released 2015-04-25 (`changes for 3.4.4`_)
-
-* `lxml 3.4.3`_, released 2015-04-15 (`changes for 3.4.3`_)
-
-* `lxml 3.4.2`_, released 2015-02-07 (`changes for 3.4.2`_)
-
-* `lxml 3.4.1`_, released 2014-11-20 (`changes for 3.4.1`_)
-
-* `lxml 3.4.0`_, released 2014-09-10 (`changes for 3.4.0`_)
-
-* `older releases <http://lxml.de/3.4/#old-versions>`_
+* `older releases <http://lxml.de/3.6/#old-versions>`_
+.. _`lxml 3.7.2`: /files/lxml-3.7.2.tgz
.. _`lxml 3.7.1`: /files/lxml-3.7.1.tgz
.. _`lxml 3.7.0`: /files/lxml-3.7.0.tgz
.. _`lxml 3.6.4`: /files/lxml-3.6.4.tgz
@@ -283,14 +274,8 @@ and the `latest in-development version <http://lxml.de/dev/>`_.
.. _`lxml 3.6.2`: /files/lxml-3.6.2.tgz
.. _`lxml 3.6.1`: /files/lxml-3.6.1.tgz
.. _`lxml 3.6.0`: /files/lxml-3.6.0.tgz
-.. _`lxml 3.5.0`: /files/lxml-3.5.0.tgz
-.. _`lxml 3.5.0b1`: /files/lxml-3.5.0b1.tgz
-.. _`lxml 3.4.4`: /files/lxml-3.4.4.tgz
-.. _`lxml 3.4.3`: /files/lxml-3.4.3.tgz
-.. _`lxml 3.4.2`: /files/lxml-3.4.2.tgz
-.. _`lxml 3.4.1`: /files/lxml-3.4.1.tgz
-.. _`lxml 3.4.0`: /files/lxml-3.4.0.tgz
+.. _`changes for 3.7.2`: /changes-3.7.2.html
.. _`changes for 3.7.1`: /changes-3.7.1.html
.. _`changes for 3.7.0`: /changes-3.7.0.html
.. _`changes for 3.6.4`: /changes-3.6.4.html
@@ -298,10 +283,3 @@ and the `latest in-development version <http://lxml.de/dev/>`_.
.. _`changes for 3.6.2`: /changes-3.6.2.html
.. _`changes for 3.6.1`: /changes-3.6.1.html
.. _`changes for 3.6.0`: /changes-3.6.0.html
-.. _`changes for 3.5.0`: /changes-3.5.0.html
-.. _`changes for 3.5.0b1`: /changes-3.5.0b1.html
-.. _`changes for 3.4.4`: /changes-3.4.4.html
-.. _`changes for 3.4.3`: /changes-3.4.3.html
-.. _`changes for 3.4.2`: /changes-3.4.2.html
-.. _`changes for 3.4.1`: /changes-3.4.1.html
-.. _`changes for 3.4.0`: /changes-3.4.0.html
diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd
index b3bab52f..0d9d8843 100644
--- a/src/lxml/includes/tree.pxd
+++ b/src/lxml/includes/tree.pxd
@@ -61,6 +61,7 @@ cdef extern from "libxml/encoding.h":
cdef extern from "libxml/chvalid.h":
cdef int xmlIsChar_ch(char c) nogil
+ cdef int xmlIsCharQ(int ch) nogil
cdef extern from "libxml/hash.h":
ctypedef struct xmlHashTable
@@ -357,6 +358,8 @@ cdef extern from "libxml/tree.h":
cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf,
xmlDoc* doc, xmlNode* cur, int level,
int format, const_char* encoding) nogil
+ cdef void xmlBufAttrSerializeTxtContent(xmlOutputBuffer *buf, xmlDoc *doc,
+ xmlAttr *attr, const_xmlChar *string) nogil
cdef void xmlNodeSetName(xmlNode* cur, const_xmlChar* name) nogil
cdef void xmlNodeSetContent(xmlNode* cur, const_xmlChar* content) nogil
cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) nogil
diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx
index 67ff69b2..c336cef2 100644
--- a/src/lxml/lxml.etree.pyx
+++ b/src/lxml/lxml.etree.pyx
@@ -1954,11 +1954,12 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
def write(self, file, *, encoding=None, method=u"xml",
pretty_print=False, xml_declaration=None, with_tail=True,
- standalone=None, docstring=None, compression=0,
- exclusive=False, with_comments=True, inclusive_ns_prefixes=None):
+ standalone=None, doctype=None, compression=0,
+ exclusive=False, with_comments=True, inclusive_ns_prefixes=None,
+ docstring=None):
u"""write(self, file, encoding=None, method="xml",
pretty_print=False, xml_declaration=None, with_tail=True,
- standalone=None, compression=0,
+ standalone=None, doctype=None, compression=0,
exclusive=False, with_comments=True, inclusive_ns_prefixes=None)
Write the tree to a filename, file or file-like object.
@@ -1976,6 +1977,12 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
output an XML declaration with the corresponding
``standalone`` flag.
+ The ``doctype`` option allows passing in a plain string that will
+ be serialised before the XML tree. Note that passing in non
+ well-formed content here will make the XML output non well-formed.
+ Also, an existing doctype in the document tree will not be removed
+ when serialising an ElementTree instance.
+
The ``compression`` option enables GZip compression level 1-9.
The ``inclusive_ns_prefixes`` should be a list of namespace strings
@@ -2030,7 +2037,15 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
else:
write_declaration = 1
is_standalone = 0
- _tofilelike(file, self._context_node, encoding, docstring, method,
+
+ if docstring is not None and doctype is None:
+ import warnings
+ warnings.warn(
+ "The 'docstring' option is deprecated. Use 'doctype' instead.",
+ DeprecationWarning)
+ doctype = docstring
+
+ _tofilelike(file, self._context_node, encoding, doctype, method,
write_declaration, 1, pretty_print, with_tail,
is_standalone, compression)
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index 4b264a50..4ef53bc9 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -399,6 +399,208 @@ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
pretty_print, encoding)
c_sibling = c_sibling.next
+
+# copied and adapted from libxml2
+cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val):
+ cdef xmlChar *ptr
+ cdef xmlChar c
+
+ out[0] = '&'
+ out += 1
+
+ out[0] = '#'
+ out += 1
+
+ out[0] = 'x'
+ out += 1
+
+ if (val < 0x10):
+ ptr = out
+ elif (val < 0x100):
+ ptr = out + 1
+ elif (val < 0x1000):
+ ptr = out + 2
+ elif (val < 0x10000):
+ ptr = out + 3
+ elif (val < 0x100000):
+ ptr = out + 4
+ else:
+ ptr = out + 5
+
+ out = ptr + 1
+ while val > 0:
+ c = (val & 0xF)
+
+ if c == 0:
+ ptr[0] = '0'
+ elif c == 1:
+ ptr[0] = '1'
+ elif c == 2:
+ ptr[0] = '2'
+ elif c == 3:
+ ptr[0] = '3'
+ elif c == 4:
+ ptr[0] = '4'
+ elif c == 5:
+ ptr[0] = '5'
+ elif c == 6:
+ ptr[0] = '6'
+ elif c == 7:
+ ptr[0] = '7'
+ elif c == 8:
+ ptr[0] = '8'
+ elif c == 9:
+ ptr[0] = '9'
+ elif c == 0xA:
+ ptr[0] = 'A'
+ elif c == 0xB:
+ ptr[0] = 'B'
+ elif c == 0xC:
+ ptr[0] = 'C'
+ elif c == 0xD:
+ ptr[0] = 'D'
+ elif c == 0xE:
+ ptr[0] = 'E'
+ elif c == 0xF:
+ ptr[0] = 'F'
+ else:
+ ptr[0] = '0'
+
+ ptr -= 1
+
+ val >>= 4
+
+ out[0] = ';'
+ out += 1
+ out[0] = 0
+
+ return out
+
+
+# copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
+cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
+ cdef const char *base
+ cdef const char *cur
+
+ cdef unsigned char tmp[12]
+ cdef int val = 0
+ cdef int l
+
+ if string == NULL:
+ return
+
+ base = cur = <const char*>string
+ while (cur[0] != 0):
+ if (cur[0] == '\n'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 5, "&#10;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '\r'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 5, "&#13;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '\t'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 4, "&#9;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '"'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 6, "&quot;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '<'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 4, "&lt;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '>'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 4, "&gt;")
+ cur += 1
+ base = cur
+ elif (cur[0] == '&'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 5, "&amp;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] >= 0x80) and (cur[1] != 0):
+
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ if (cur[0] < 0xC0):
+ # invalid UTF-8 sequence
+ val = cur[0]
+ l = 1
+
+ elif (cur[0] < 0xE0):
+ val = (cur[0]) & 0x1F
+ val <<= 6
+ val |= (cur[1]) & 0x3F
+ l = 2
+
+ elif ((cur[0] < 0xF0) and (cur[2] != 0)):
+ val = (cur[0]) & 0x0F
+ val <<= 6
+ val |= (cur[1]) & 0x3F
+ val <<= 6
+ val |= (cur[2]) & 0x3F
+ l = 3
+
+ elif ((cur[0] < 0xF8) and (cur[2] != 0) and (cur[3] != 0)):
+ val = (cur[0]) & 0x07
+ val <<= 6
+ val |= (cur[1]) & 0x3F
+ val <<= 6
+ val |= (cur[2]) & 0x3F
+ val <<= 6
+ val |= (cur[3]) & 0x3F
+ l = 4
+ else:
+ # invalid UTF-8 sequence
+ val = cur[0]
+ l = 1
+
+ if ((l == 1) or (not tree.xmlIsCharQ(val))):
+ raise ValueError("Invalid character: %X" % val)
+
+ # We could do multiple things here. Just save
+ # as a char ref
+ xmlSerializeHexCharRef(tmp, val)
+ tree.xmlOutputBufferWrite(buf, -1, <const char*> tmp)
+ cur += l
+ base = cur
+
+ else:
+ cur += 1
+
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+
############################################################
# output to file-like objects
@@ -897,7 +1099,8 @@ cdef class _IncrementalFileWriter:
tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
self._write_qname(name, prefix)
tree.xmlOutputBufferWrite(self._c_out, 2, '="')
- tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(value), NULL)
+ _write_attr_string(self._c_out, _cstr(value))
+
tree.xmlOutputBufferWrite(self._c_out, 1, '"')
cdef _write_end_element(self, element_config):
@@ -969,14 +1172,12 @@ cdef class _IncrementalFileWriter:
raise LxmlSyntaxError("not in an element")
content = _utf8(content)
- if len(self._element_stack) > 0:
- ns, name, _, _ = self._element_stack[-1]
- else:
- ns, name = None, None
+ ns, name, _, _ = self._element_stack[-1]
+ if (c_method == OUTPUT_METHOD_HTML and
+ ns in (None, b'http://www.w3.org/1999/xhtml') and
+ name in (b'script', b'style')):
+ tree.xmlOutputBufferWrite(self._c_out, len(content), _cstr(content))
- if c_method == OUTPUT_METHOD_HTML and \
- ns in (None, 'http://www.w3.org/1999/xhtml') and name in ('script', 'style'):
- tree.xmlOutputBufferWrite(self._c_out, len(content), content)
else:
tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(content), NULL)
diff --git a/src/lxml/tests/common_imports.py b/src/lxml/tests/common_imports.py
index 85b1157d..4547d2d5 100644
--- a/src/lxml/tests/common_imports.py
+++ b/src/lxml/tests/common_imports.py
@@ -117,12 +117,15 @@ def _get_caller_relative_path(filename, frame_depth=2):
from io import StringIO
+unichr_escape = re.compile(r'\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}')
+
if sys.version_info[0] >= 3:
# Python 3
from builtins import str as unicode
+ from codecs import unicode_escape_decode
_chr = chr
def _str(s, encoding="UTF-8"):
- return s
+ return unichr_escape.sub(lambda x: unicode_escape_decode(x.group(0))[0], s)
def _bytes(s, encoding="UTF-8"):
return s.encode(encoding)
from io import BytesIO as _BytesIO
@@ -144,8 +147,6 @@ if sys.version_info[0] >= 3:
doctests, {}, os.path.basename(filename), filename, 0))
else:
# Python 2
- unichr_escape = re.compile(r'\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}')
-
from __builtin__ import unicode
_chr = unichr
def _str(s, encoding="UTF-8"):
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 4ec59096..d1c79e05 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -20,6 +20,7 @@ import tempfile
import textwrap
import zlib
import gzip
+from contextlib import closing, contextmanager
from .common_imports import etree, StringIO, BytesIO, HelperTestCase
from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
@@ -43,6 +44,16 @@ except NameError:
_unicode = str
+@contextmanager
+def tmpfile():
+ handle, filename = tempfile.mkstemp()
+ try:
+ yield filename
+ finally:
+ os.close(handle)
+ os.remove(filename)
+
+
class ETreeOnlyTestCase(HelperTestCase):
"""Tests only for etree, not ElementTree"""
etree = etree
@@ -4062,39 +4073,25 @@ class ETreeC14NTestCase(HelperTestCase):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
f = BytesIO()
tree.write_c14n(f, compression=9)
- gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
- try:
+ with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
s = gzfile.read()
- finally:
- gzfile.close()
self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
s)
def test_c14n_file(self):
tree = self.parse(_bytes('<a><b/></a>'))
- handle, filename = tempfile.mkstemp()
- try:
+ with tmpfile() as filename:
tree.write_c14n(filename)
data = read_file(filename, 'rb')
- finally:
- os.close(handle)
- os.remove(filename)
self.assertEqual(_bytes('<a><b></b></a>'),
data)
def test_c14n_file_gzip(self):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
- handle, filename = tempfile.mkstemp()
- try:
+ with tmpfile() as filename:
tree.write_c14n(filename, compression=9)
- f = gzip.open(filename, 'rb')
- try:
+ with closing(gzip.open(filename, 'rb')) as f:
data = f.read()
- finally:
- f.close()
- finally:
- os.close(handle)
- os.remove(filename)
self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
data)
@@ -4225,18 +4222,32 @@ class ETreeWriteTestCase(HelperTestCase):
self.assertEqual(_bytes('<a><b/></a>'),
s)
+ def test_write_doctype(self):
+ tree = self.parse(_bytes('<a><b/></a>'))
+ f = BytesIO()
+ tree.write(f, doctype='HUHU')
+ s = f.getvalue()
+ self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
+ s)
+
def test_write_gzip(self):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
f = BytesIO()
tree.write(f, compression=9)
- gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
- try:
+ with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
s = gzfile.read()
- finally:
- gzfile.close()
self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
s)
+ def test_write_gzip_doctype(self):
+ tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
+ f = BytesIO()
+ tree.write(f, compression=9, doctype='<!DOCTYPE a>')
+ with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
+ s = gzfile.read()
+ self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
+ s)
+
def test_write_gzip_level(self):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
f = BytesIO()
@@ -4251,21 +4262,15 @@ class ETreeWriteTestCase(HelperTestCase):
tree.write(f, compression=1)
s = f.getvalue()
self.assertTrue(len(s) <= len(s0))
- gzfile = gzip.GzipFile(fileobj=BytesIO(s))
- try:
+ with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
s1 = gzfile.read()
- finally:
- gzfile.close()
f = BytesIO()
tree.write(f, compression=9)
s = f.getvalue()
self.assertTrue(len(s) <= len(s0))
- gzfile = gzip.GzipFile(fileobj=BytesIO(s))
- try:
+ with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
s9 = gzfile.read()
- finally:
- gzfile.close()
self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
s0)
@@ -4276,57 +4281,39 @@ class ETreeWriteTestCase(HelperTestCase):
def test_write_file(self):
tree = self.parse(_bytes('<a><b/></a>'))
- handle, filename = tempfile.mkstemp()
- try:
+ with tmpfile() as filename:
tree.write(filename)
data = read_file(filename, 'rb')
- finally:
- os.close(handle)
- os.remove(filename)
self.assertEqual(_bytes('<a><b/></a>'),
data)
def test_write_file_gzip(self):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
- handle, filename = tempfile.mkstemp()
- try:
+ with tmpfile() as filename:
tree.write(filename, compression=9)
- f = gzip.open(filename, 'rb')
- try:
+ with closing(gzip.open(filename, 'rb')) as f:
data = f.read()
- finally:
- f.close()
- finally:
- os.close(handle)
- os.remove(filename)
self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
data)
def test_write_file_gzip_parse(self):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
- handle, filename = tempfile.mkstemp()
- try:
+ with tmpfile() as filename:
tree.write(filename, compression=9)
data = etree.tostring(etree.parse(filename))
- finally:
- os.close(handle)
- os.remove(filename)
self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
data)
def test_write_file_gzipfile_parse(self):
tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
- handle, filename = tempfile.mkstemp()
- try:
+ with tmpfile() as filename:
tree.write(filename, compression=9)
- data = etree.tostring(etree.parse(
- gzip.GzipFile(filename)))
- finally:
- os.close(handle)
- os.remove(filename)
+ with closing(gzip.GzipFile(filename)) as f:
+ data = etree.tostring(etree.parse(f))
self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
data)
+
class ETreeErrorLogTest(HelperTestCase):
etree = etree
@@ -4527,5 +4514,6 @@ def test_suite():
[make_doctest('../../../doc/resolvers.txt')])
return suite
+
if __name__ == '__main__':
print('to test use test.py %s' % __file__)
diff --git a/src/lxml/tests/test_incremental_xmlfile.py b/src/lxml/tests/test_incremental_xmlfile.py
index 81f49ac6..c2f162b2 100644
--- a/src/lxml/tests/test_incremental_xmlfile.py
+++ b/src/lxml/tests/test_incremental_xmlfile.py
@@ -2,11 +2,9 @@
"""
Tests for the incremental XML serialisation API.
-
-Tests require Python 2.5 or later.
"""
-from __future__ import with_statement
+from __future__ import with_statement, absolute_import
import unittest
import tempfile, os, sys
@@ -17,7 +15,8 @@ this_dir = os.path.dirname(__file__)
if this_dir not in sys.path:
sys.path.insert(0, this_dir) # needed for Py3
-from common_imports import etree, BytesIO, HelperTestCase, skipIf
+from .common_imports import etree, BytesIO, HelperTestCase, skipIf, _str
+
class _XmlFileTestCaseBase(HelperTestCase):
_file = None # to be set by specific subtypes below
@@ -454,6 +453,20 @@ class HtmlFileTestCase(_XmlFileTestCaseBase):
'</root>')
self._file = BytesIO()
+ def test_attribute_quoting(self):
+ with etree.htmlfile(self._file) as xf:
+ with xf.element("tagname", attrib={"attr": '"misquoted"'}):
+ xf.write("foo")
+
+ self.assertXml('<tagname attr="&quot;misquoted&quot;">foo</tagname>')
+
+ def test_attribute_quoting_unicode(self):
+ with etree.htmlfile(self._file) as xf:
+ with xf.element("tagname", attrib={"attr": _str('"misquöted\\u3344\\U00013344"')}):
+ xf.write("foo")
+
+ self.assertXml('<tagname attr="&quot;misqu&#246;ted&#13124;&#78660;&quot;">foo</tagname>')
+
def test_unescaped_script(self):
with etree.htmlfile(self._file) as xf:
elt = etree.Element('script')
diff --git a/tools/manylinux/build-wheels.sh b/tools/manylinux/build-wheels.sh
index 2ebf2503..308c38a9 100755
--- a/tools/manylinux/build-wheels.sh
+++ b/tools/manylinux/build-wheels.sh
@@ -35,6 +35,7 @@ prepare_system() {
#yum install -y zlib-devel
# Remove Python 2.6 symlinks
rm -f /opt/python/cp26*
+ echo "Python versions found: $(cd /opt/python && echo cp* | sed -e 's|[^ ]*-||g')"
}
build_wheels() {
@@ -64,7 +65,8 @@ repair_wheels() {
}
show_wheels() {
- ls -l $WHEELHOUSE
+ filename=${SDIST##*/}
+ ls -l $WHEELHOUSE/${filename%%.tar.gz}
}
prepare_system
diff --git a/version.txt b/version.txt
index a76ccff2..0b2eb36f 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-3.7.1
+3.7.2