summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2023-05-02 23:04:27 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2023-05-02 23:04:27 +0000
commit327493250f147bdb6e60a5cefafee46cdc4e8acb (patch)
tree5fff5ac653669b7cc646922c53286db97c7d10de
parent7457c3e1896cfe3807929a2e1a18531e02a05f20 (diff)
downloaddocutils-327493250f147bdb6e60a5cefafee46cdc4e8acb.tar.gz
Revert addition of `io.OutString` and the "auto_encode" argument.
We need a review of the "string output" interface and a consensus on the "clean" end-state before starting with the implementation. git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9369 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/HISTORY.txt7
-rw-r--r--docutils/RELEASE-NOTES.txt11
-rw-r--r--docutils/docs/api/publisher.txt123
-rw-r--r--docutils/docs/user/config.txt2
-rw-r--r--docutils/docutils/core.py44
-rw-r--r--docutils/docutils/io.py95
-rwxr-xr-xdocutils/test/test_io.py59
-rwxr-xr-xdocutils/test/test_parsers/test_recommonmark/test_misc.py18
-rwxr-xr-xdocutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py20
-rwxr-xr-xdocutils/test/test_publisher.py29
-rwxr-xr-xdocutils/test/test_writers/test_html4css1_misc.py10
-rwxr-xr-xdocutils/test/test_writers/test_html4css1_template.py4
-rw-r--r--docutils/test/test_writers/test_html5_polyglot_misc.py6
-rw-r--r--docutils/test/test_writers/test_html5_template.py4
-rwxr-xr-xdocutils/test/test_writers/test_latex2e.py40
-rw-r--r--docutils/test/test_writers/test_manpage.py4
-rwxr-xr-xdocutils/test/test_writers/test_pseudoxml.py8
-rwxr-xr-xdocutils/test/test_writers/test_s5.py10
18 files changed, 160 insertions, 334 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt
index a55c7b815..48de19fdb 100644
--- a/docutils/HISTORY.txt
+++ b/docutils/HISTORY.txt
@@ -23,8 +23,6 @@ Release 0.20 (unpublished)
* docutils/core.py
- - New argument "auto_encode" for `publish_string()`,
- `publish_from_docstring()` and `publish_programmatically()`.
- New functions `rst2…()` for use as "console_scripts" `entry points`_.
(cf. `Future changes` in the RELEASE-NOTES_).
@@ -33,11 +31,6 @@ Release 0.20 (unpublished)
- New configuration setting output_. Obsoletes the ``<destination>``
positional argument (cf. `Future changes` in the RELEASE-NOTES_).
-* docutils/io.py
-
- - New `str` sub-class `io.OutString` with "encoding" and "errors"
- attributes.
-
* docutils/languages/
docutils/parsers/rst/languages/
diff --git a/docutils/RELEASE-NOTES.txt b/docutils/RELEASE-NOTES.txt
index 5d42f2738..6a5bfc35a 100644
--- a/docutils/RELEASE-NOTES.txt
+++ b/docutils/RELEASE-NOTES.txt
@@ -147,11 +147,6 @@ Drop support for Python 3.7 and 3.8 in Docutils 0.21.
* "null" writer: output will change to the empty string in Docutils 0.22.
-* The default value of the `auto_encode` argument of `core.publish_str()`,
- `core.publish_from_doctree()`, and `core.publish_programmatically()`
- will change to ``False`` in Docutils 0.22.
- The functions will then return a `str` object by default.
-
* Remove the "rawsource" argument from `nodes.Text.__init__()`
(deprecated and ignored since Docutils 0.18) in Docutils 2.0.
@@ -223,17 +218,11 @@ Release 0.20 (unpublished)
__ `command-line usage pattern`_
-* The new function argument `auto_encode` for Publisher convenience
- functions with `String I/O`_ selects whether the output document is
- encoded and returned as `bytes` instance. The default is ``True`` (for
- backwards compatibility) and will change to ``False`` in Docutils 0.22.
-
* `utils.find_file_in_dirs()` now returns a POSIX path also on Windows;
`utils.get_stylesheet_list()` no longer converts ``\`` to ``/``.
* Bugfixes and improvements (see HISTORY_).
-.. _String I/O: docs/api/publisher.html#string-i-o
.. _output: docs/user/config.html#output
diff --git a/docutils/docs/api/publisher.txt b/docutils/docs/api/publisher.txt
index 3ff5c4e6c..a31935879 100644
--- a/docutils/docs/api/publisher.txt
+++ b/docutils/docs/api/publisher.txt
@@ -16,9 +16,8 @@
The ``docutils.core.Publisher`` class is the core of Docutils,
managing all the processing and relationships between components. See
`PEP 258`_ for an overview of Docutils components.
-Configuration_ is done via `runtime settings`_ assembled from several sources.
-
-The `Publisher convenience functions`_ are the normal entry points for
+Configuration is done via `runtime settings`_ assembled from several sources.
+The *Publisher convenience functions* are the normal entry points for
using Docutils as a library.
.. _PEP 258: ../peps/pep-0258.html
@@ -42,14 +41,14 @@ a description of the function arguments.
publish_cmdline()
-----------------
-Function for command-line front-end tools, like ``rst2html.py``
-or functions for `"console_scripts" entry points`_ like `core.rst2html()`
-with file I/O.
-In addition to writing the output document to a file, also returns it as
-`str` instance (rsp. `bytes` for binary output document formats).
+Function for command-line front-end tools, like ``rst2html.py`` or
+`"console_scripts" entry points`_ like `core.rst2html()` with file I/O.
+In addition to writing the output document to a file-like object, also
+returns it as `str` instance (rsp. `bytes` for binary output document
+formats).
There are several examples in the ``tools/`` directory of the Docutils
-repository. A detailed analysis of one such tool is in `Inside A Docutils
+repository. A detailed analysis of one such tool is `Inside A Docutils
Command-Line Front-End Tool`_.
.. _"console_scripts" entry points:
@@ -60,9 +59,9 @@ Command-Line Front-End Tool`_.
publish_file()
--------------
-For programmatic use with file-like I/O.
-In addition to writing the output document to a file, also returns it as
-`str` instance (rsp. `bytes` for binary output document formats).
+For programmatic use with file I/O. In addition to writing the output
+document to a file-like object, also returns it as `str` instance
+(rsp. `bytes` for binary output document formats).
publish_string()
@@ -75,23 +74,23 @@ Input
`bytes` are decoded with input_encoding_.
Output
- is a memory object:
-
- * a `str` instance [#]_, if the "auto_encode" function argument is
- ``False`` or output_encoding_ is set to the special value
+ * is a `bytes` instance, if output_encoding_ is set to an encoding
+ registered with Python's "codecs_" module (default: "utf-8"),
+ * a `str` instance, if output_encoding_ is set to the special value
``"unicode"``.
- * a `bytes` instance, if the "auto_encode" argument is ``True`` and
- output_encoding_ is set to an encoding registered with
- Python's "codecs_" module (default: "utf-8").
+.. Caution::
+ The "output_encoding" and "output_encoding_error_handler" `runtime
+ settings`_ may affect the content of the output document:
+ Some document formats contain an *encoding declaration*,
+ some formats use substitutions for non-encodable characters.
- Calling ``output = bytes(publish_string(…))`` ensures that ``output``
- is a `bytes` instance encoded with the configured output_encoding_
- (matching the encoding indicated inside HTML, XML, and LaTeX documents).
+ Use `publish_parts()`_ to get a `str` instance of the output document
+ as well as the values of the output_encoding_ and
+ output_encoding_error_handler_ runtime settings.
-.. [#] More precisely, an instance of a `str` sub-class with the
- output_encoding_ and output_encoding_error_handler_ configuration
- settings stored as "encoding" and "errors" attributes.
+*This function is provisional* because in Python 3 the name and behaviour
+no longer match.
.. _codecs: https://docs.python.org/3/library/codecs.html
@@ -110,15 +109,16 @@ publish_from_doctree()
Render from an existing `document tree`_ data structure (doctree).
Returns the output document as a memory object (cf. `string I/O`_).
+*This function is provisional* because in Python 3 the name and behaviour
+of the *string output* interface no longer match.
+
publish_programmatically()
--------------------------
Auxilliary function used by `publish_file()`_, `publish_string()`_,
-`publish_doctree()`_, and `publish_parts()`_.
-It returns a 2-tuple: the output document as memory object (cf. `string
-I/O`_) and the Publisher object.
-
+`publish_doctree()`_, and `publish_parts()`_.
+Applications should not need to call this function directly.
.. _publish-parts-details:
@@ -126,14 +126,24 @@ publish_parts()
---------------
For programmatic use with string input (cf. `string I/O`_).
-Returns a dictionary of document parts. Dictionary keys are the names of
-parts, and values are `str` instances; encoding is up to the client.
-Useful when only portions of the processed document are desired.
+Returns a dictionary of document parts as `str` instances. [#binary-output]_
+Dictionary keys are the part names.
+Each Writer component may publish a different set of document parts,
+described below.
-There are usage examples in the `docutils/examples.py`_ module.
+Example: post-process the output document with a custom function
+``post_process()`` before encoding with user-customizable encoding
+and errors ::
-Each Writer component may publish a different set of document parts,
-described below. Not all writers implement all parts.
+ def publish_bytes_with_postprocessing(*args, **kwargs):
+ parts = publish_parts(*args, **kwargs)
+ out_str = post_process(parts['whole'])
+ return out_str.encode(parts['encoding'], parts['errors'])
+
+There are more usage examples in the `docutils/examples.py`_ module.
+
+.. _docutils/examples.py: ../../docutils/examples.py
+.. _ODT: ../user/odt.html
Parts Provided By All Writers
@@ -141,7 +151,7 @@ Parts Provided By All Writers
_`encoding`
The `output_encoding`_ setting.
-
+
_`errors`
The `output_encoding_error_handler`_ setting.
@@ -149,7 +159,10 @@ _`version`
The version of Docutils used.
_`whole`
- ``parts['whole']`` contains the entire formatted document.
+ Contains the entire formatted document. [#binary-output]_
+
+ .. [#binary-output] Output documents in binary formats (e.g. ODT_)
+ are stored as a `bytes` instance.
Parts Provided By the HTML Writers
@@ -233,8 +246,8 @@ _`html_body`
_`html_head`
``parts['html_head']`` contains the HTML ``<head>`` content, less
the stylesheet link and the ``<head>`` and ``</head>`` tags
- themselves. Since ``publish_parts`` returns `str` instances and
- does not know about the output encoding, the "Content-Type" meta
+ themselves. Since `publish_parts()` returns `str` instances which
+ do not know about the output encoding, the "Content-Type" meta
tag's "charset" value is left unresolved, as "%s"::
<meta http-equiv="Content-Type" content="text/html; charset=%s" />
@@ -388,39 +401,42 @@ titledata
https://docutils.sourceforge.io/docutils/writers/latex2e/xelatex.tex
-.. _docutils/examples.py: ../../docutils/examples.py
-
+.. _runtime settings:
Configuration
=============
-Docutils is configured by runtime settings assembled from several
+Docutils is configured by *runtime settings* assembled from several
sources:
* *settings specifications* of the selected components (reader, parser,
writer),
-* *configuration files* (if enabled), and
+* the ``settings_overrides`` argument of the `Publisher convenience
+ functions`_ (see below),
+* *configuration files* (unless disabled), and
* *command-line options* (if enabled).
-The individual settings are described in `Docutils Configuration`_.
-
Docutils overlays default and explicitly specified values from these
sources such that settings behave the way we want and expect them to
behave. For details, see `Docutils Runtime Settings`_.
+The individual settings are described in `Docutils Configuration`_.
To pass application-specific setting defaults to the Publisher
convenience functions, use the ``settings_overrides`` parameter. Pass
a dictionary of setting names & values, like this::
- overrides = {'input_encoding': 'ascii',
- 'output_encoding': 'latin-1'}
- output = publish_string(..., settings_overrides=overrides)
+ app_defaults = {'input_encoding': 'ascii',
+ 'output_encoding': 'latin-1'}
+ output = publish_string(..., settings_overrides=app_defaults)
Settings from command-line options override configuration file
settings, and they override application defaults.
-Further customization is possible creating custom component
-objects and passing *them* to ``publish_*()`` or the ``Publisher``.
+See `Docutils Runtime Settings`_ or the docstring of
+`publish_programmatically()` for a description of all `configuration
+arguments`_ of the Publisher convenience functions.
+
+.. _configuration arguments: runtime-settings.html#convenience-functions
Encodings
@@ -453,7 +469,9 @@ The default **output encoding** is UTF-8.
A different encoding can be specified with the `output_encoding`_ setting.
.. Caution:: Docutils may introduce non-ASCII text if you use
- `auto-symbol footnotes`_ or the `"contents" directive`_.
+ `auto-symbol footnotes`_ or the `"contents" directive`_.
+ In non-English documents, also auto-generated labels
+ may contain non-ASCII characters.
.. [#magic-comment] A comment like ::
@@ -496,9 +514,8 @@ A different encoding can be specified with the `output_encoding`_ setting.
../ref/rst/restructuredtext.html#auto-symbol-footnotes
.. _"contents" directive:
../ref/rst/directives.html#table-of-contents
-.. _document tree:
+.. _document tree:
.. _Docutils document tree: ../ref/doctree.html
-.. _runtime settings:
.. _Docutils Runtime Settings: ./runtime-settings.html
.. _Docutils Configuration: ../user/config.html
.. _inspecting_codecs: https://codeberg.org/milde/inspecting-codecs
diff --git a/docutils/docs/user/config.txt b/docutils/docs/user/config.txt
index a6d5ed398..a09572912 100644
--- a/docutils/docs/user/config.txt
+++ b/docutils/docs/user/config.txt
@@ -476,7 +476,7 @@ record_dependencies
Path to a file where Docutils will write a list of files that were
required to generate the output, e.g. included files or embedded
-stylesheets [#dependencies]_. [#pwd]_ The format is one path per
+stylesheets. [#dependencies]_ [#pwd]_ The format is one path per
line with forward slashes as separator, the encoding is UTF-8.
Set to ``-`` in order to write dependencies to stdout.
diff --git a/docutils/docutils/core.py b/docutils/docutils/core.py
index 3f99526c9..ef89db986 100644
--- a/docutils/docutils/core.py
+++ b/docutils/docutils/core.py
@@ -439,27 +439,21 @@ def publish_string(source, source_path=None, destination_path=None,
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, config_section=None,
- enable_exit_status=False,
- auto_encode=True):
+ enable_exit_status=False):
"""
Set up & run a `Publisher` for programmatic use with string I/O.
Accepts a `bytes` or `str` instance as `source`.
- If `auto_encode` is True, the output is encoded according to the
- `output_encoding`_ setting; the return value is a `bytes` instance
- (unless `output_encoding`_ is "unicode",
- cf. `docutils.io.StringOutput.write()`).
-
- If `auto_encode` is False, the output is an instance of a `str`
- sub-class with "output_encoding" and "output_encoding_error_handler"
- settings stored as `encoding` and `errors` attributes.
-
- The default value of `auto_encode` will change to ``False`` in
- Docutils 0.22.
+ The output is encoded according to the `output_encoding`_ setting;
+ the return value is a `bytes` instance (unless `output_encoding`_ is
+ "unicode", cf. `docutils.io.StringOutput.write()`).
Parameters: see `publish_programmatically()`.
+ This function is provisional because in Python 3 name and behaviour
+ no longer match.
+
.. _output_encoding:
https://docutils.sourceforge.io/docs/user/config.html#output-encoding
"""
@@ -473,8 +467,7 @@ def publish_string(source, source_path=None, destination_path=None,
settings=settings, settings_spec=settings_spec,
settings_overrides=settings_overrides,
config_section=config_section,
- enable_exit_status=enable_exit_status,
- auto_encode=auto_encode)
+ enable_exit_status=enable_exit_status)
return output
@@ -546,8 +539,7 @@ def publish_from_doctree(document, destination_path=None,
writer=None, writer_name='pseudoxml',
settings=None, settings_spec=None,
settings_overrides=None, config_section=None,
- enable_exit_status=False,
- auto_encode=True):
+ enable_exit_status=False):
"""
Set up & run a `Publisher` to render from an existing document tree
data structure. For programmatic use with string output
@@ -563,6 +555,9 @@ def publish_from_doctree(document, destination_path=None,
document tree.
Other parameters: see `publish_programmatically()`.
+
+ This function is provisional because in Python 3 name and behaviour
+ of the `io.StringOutput` class no longer match.
"""
reader = doctree.Reader(parser_name='null')
publisher = Publisher(reader, None, writer,
@@ -574,7 +569,6 @@ def publish_from_doctree(document, destination_path=None,
publisher.process_programmatic_settings(
settings_spec, settings_overrides, config_section)
publisher.set_destination(None, destination_path)
- publisher.destination.auto_encode = auto_encode
return publisher.publish(enable_exit_status=enable_exit_status)
@@ -623,12 +617,12 @@ def publish_programmatically(source_class, source, source_path,
writer, writer_name,
settings, settings_spec,
settings_overrides, config_section,
- enable_exit_status,
- auto_encode=True):
+ enable_exit_status):
"""
Set up & run a `Publisher` for custom programmatic use.
- Return the output (as `str` or `bytes`) and the Publisher object.
+ Return the output (as `str` or `bytes`, depending on `destination_class`,
+ writer, and the "output_encoding" setting) and the Publisher object.
Applications should not need to call this function directly. If it does
seem to be necessary to call this function directly, please write to the
@@ -716,12 +710,6 @@ def publish_programmatically(source_class, source, source_path,
defined by `settings_spec`. Used only if no `settings` specified.
* `enable_exit_status`: Boolean; enable exit status at end of processing?
-
- * `auto_encode`: Boolean; encode string output and return `bytes`?
- Ignored with `io.FileOutput`.
- New in Docutils 0.21.
- The default value will change to ``False`` in Docutils 0.22 or later.
- The argument may be removed in Docutils 2.0 or later.
"""
publisher = Publisher(reader, parser, writer, settings=settings,
source_class=source_class,
@@ -731,8 +719,6 @@ def publish_programmatically(source_class, source, source_path,
settings_spec, settings_overrides, config_section)
publisher.set_source(source, source_path)
publisher.set_destination(destination, destination_path)
- if isinstance(publisher.destination, io.StringOutput):
- publisher.destination.auto_encode = auto_encode
output = publisher.publish(enable_exit_status=enable_exit_status)
return output, publisher
diff --git a/docutils/docutils/io.py b/docutils/docutils/io.py
index 99f8b121a..a1cc5e44e 100644
--- a/docutils/docutils/io.py
+++ b/docutils/docutils/io.py
@@ -74,57 +74,6 @@ def error_string(err):
return f'{err.__class__.__name__}: {err}'
-class OutString(str):
- """Return a string representation of `object` with known encoding.
-
- Differences to `str()`:
-
- If the `encoding` is given, both `str` instances and byte-like objects
- are stored as text string, the latter decoded with `encoding` and
- `errors` (defaulting to 'strict').
-
- The encoding is never guessed. If `encoding` is None (the default),
- an informal string representation is used, also if `errors` are given.
-
- The original or intended encoding and error handler are stored in the
- attributes `encoding` and `errors`.
- Typecasting to `bytes` uses the stored values.
- """
-
- def __new__(cls, object, encoding=None, errors='strict'):
- """Return a new OutString object.
-
- Provisional.
- """
- try:
- # decode bytes-like objects if encoding is known
- return super().__new__(cls, object, encoding, errors)
- except TypeError:
- return super().__new__(cls, object)
-
- def __init__(self, object, encoding=None, errors='strict'):
- """Set "encoding" and "errors" attributes."""
- self.encoding = encoding
- self.errors = errors
-
- def __bytes__(self):
- try:
- return super().encode(self.encoding, self.errors)
- except TypeError:
- raise TypeError('OutString instance without known encoding')
-
- def __repr__(self):
- if self.errors != 'strict':
- errors_arg = f', errors={self.errors!r}'
- else:
- errors_arg = ''
- return (f'{self.__class__.__name__}({super().__repr__()}, '
- f'encoding={self.encoding!r}{errors_arg})')
-
- def encode(self, encoding=None, errors=None):
- return super().encode(encoding or self.encoding, errors or self.errors)
-
-
class Input(TransformSpec):
"""
Abstract base class for input wrappers.
@@ -652,53 +601,29 @@ class StringInput(Input):
class StringOutput(Output):
- """Output to a `bytes` or `str` instance."""
-
- default_destination_path = '<string>'
-
- def __init__(self, destination=None, destination_path=None,
- encoding=None, error_handler='strict', auto_encode=True):
- """Initialize self.
+ """Output to a `bytes` or `str` instance.
- `auto_encode` determines the return type of `self.write()`.
- Its default value will change to False in Docutils 0.22.
- Other attributes are passed to `Output.__init__()`.
- """
+ Provisional.
+ """
- self.auto_encode = auto_encode
- """Let `write()` encode the output document and return `bytes`."""
- super().__init__(destination, destination_path,
- encoding, error_handler)
+ default_destination_path = '<string>'
def write(self, data):
"""Store `data` in `self.destination`, and return it.
- If `self.auto_encode` is False, `data` must be a `str` instance
- and is stored/returned as `str` sub-class `OutString` with
- attributes "encoding" and "errors" set to `self.encoding`
- and `self.error_handler` respectively.
+ If `self.encoding` is set to the pseudo encoding name "unicode",
+ `data` must be a `str` instance and is stored/returned unchanged
+ (cf. `Output.encode`).
- If `self.auto_encode` is True, `data` can be a `bytes` or `str`
- instance and is stored/returned as a `bytes` instance
+ Otherwise, `data` can be a `bytes` or `str` instance and is
+ stored/returned as a `bytes` instance
(`str` data is encoded with `self.encode()`).
- Exception (provisional): If `self.encoding` is set to the pseudo
- encoding name "unicode", `data` must be a `str` instance and is
- stored/returned unchanged (cf. `Output.encode`).
Attention: the `output_encoding`_ setting may affect the content
of the output (e.g. an encoding declaration in HTML or XML or the
representation of characters as LaTeX macro vs. literal character).
"""
- if self.auto_encode:
- self.destination = self.encode(data)
- return self.destination
- if not isinstance(data, str):
- raise ValueError('StringOutput.write() expects `str` instance, '
- f'not {type(data)}.')
- encoding = self.encoding
- if not encoding or encoding.lower() == 'unicode':
- encoding = None
- self.destination = OutString(data, encoding, self.error_handler)
+ self.destination = self.encode(data)
return self.destination
diff --git a/docutils/test/test_io.py b/docutils/test/test_io.py
index 6c8c70254..b1e55a148 100755
--- a/docutils/test/test_io.py
+++ b/docutils/test/test_io.py
@@ -190,19 +190,6 @@ class OutputTests(unittest.TestCase):
fo.write(self.udata)
self.assertEqual(self.udrain.getvalue(), self.udata)
- def test_write_auto_encode_false(self):
- so = du_io.StringOutput(encoding='latin1', error_handler='replace',
- auto_encode=False)
- output = so.write(self.udata)
- # store output in self.destination and also return it
- self.assertEqual(output, self.udata)
- self.assertEqual(so.destination, self.udata)
- # store also encoding and encoding error handler ...
- self.assertEqual(output.encoding, 'latin1')
- self.assertEqual(output.errors, 'replace')
- # ... to allow easy conversion to `bytes`:
- self.assertEqual(bytes(output), self.bdata)
-
def test_FileOutput_hande_io_errors_deprection_warning(self):
with self.assertWarnsRegex(DeprecationWarning,
'"handle_io_errors" is ignored'):
@@ -238,52 +225,6 @@ class OutputTests(unittest.TestCase):
self.assertRaises(ValueError, fo.write, self.udata)
-class OutStringTests(unittest.TestCase):
-
- def test__init__defaults(self):
- """Test `__new__()` and `__init__()` with default values."""
-
- os = du_io.OutString('Grüße')
- self.assertEqual(str(os), 'Grüße')
- self.assertEqual(os.encoding, None)
- self.assertEqual(os.errors, 'strict')
- # converting to `bytes` fails if the encoding is not known:
- with self.assertRaises(TypeError):
- self.assertEqual(bytes(os), 'Grüße')
- # without known encoding, `bytes` and other incompatible types
- # are converted to their string representation ...
- bos = du_io.OutString(b'gut')
- self.assertEqual(str(bos), "b'gut'")
- bos_e = du_io.OutString('Grüße'.encode('latin1'), errors='ignore')
- self.assertEqual(str(bos_e), r"b'Gr\xfc\xdfe'")
- bos = du_io.OutString(b'gut', encoding=None)
- self.assertEqual(str(bos), "b'gut'")
-
- def test__init__custom_attributes(self):
- """Test `__new__()` and `__init__()` with custom encoding."""
- os8 = du_io.OutString('Grüße', encoding='utf-8')
- self.assertEqual(str(os8), 'Grüße')
- self.assertEqual(bytes(os8), b'Gr\xc3\xbc\xc3\x9fe')
- self.assertEqual(repr(os8), "OutString('Grüße', encoding='utf-8')")
- # With known encoding, "bytes-like" objects are decoded
- bos1 = du_io.OutString(b'Gr\xfc\xdfe', encoding='latin1')
- self.assertEqual(str(bos1), 'Grüße')
- self.assertEqual(bytes(bos1), b'Gr\xfc\xdfe')
- # Invalid encodings (including the empty string) raise an error
- with self.assertRaises(LookupError):
- du_io.OutString(b'Gr\xfc\xdfe', encoding='')
-
- def test__init__custom_errors(self):
- """Test `__new__()` and `__init__()` with custom `errors`."""
- ts8_r = du_io.OutString('Grüße', encoding='utf-8', errors='replace')
- # Encoding uses the stored error handler:
- self.assertEqual(ts8_r.encode('ascii'), b'Gr??e')
- # Initialization with a `bytes` object uses the error handler, too:
- bts8_r = du_io.OutString(b'Gr\xfc\xdfe', encoding='utf-8',
- errors='replace')
- self.assertEqual(str(bts8_r), 'Gr��e')
-
-
class ErrorOutputTests(unittest.TestCase):
def test_defaults(self):
e = du_io.ErrorOutput()
diff --git a/docutils/test/test_parsers/test_recommonmark/test_misc.py b/docutils/test/test_parsers/test_recommonmark/test_misc.py
index e0e2265b7..5ab7b9fd1 100755
--- a/docutils/test/test_parsers/test_recommonmark/test_misc.py
+++ b/docutils/test/test_parsers/test_recommonmark/test_misc.py
@@ -54,21 +54,19 @@ class RecommonmarkParserTests(unittest.TestCase):
settings_overrides={
'warning_stream': '',
'raw_enabled': False,
- },
- auto_encode=False)
- self.assertNotIn('<raw>', output)
- self.assertIn('<system_message', output)
- self.assertIn('Raw content disabled.', output)
+ })
+ self.assertNotIn(b'<raw>', output)
+ self.assertIn(b'<system_message', output)
+ self.assertIn(b'Raw content disabled.', output)
def test_raw_disabled_inline(self):
output = publish_string('foo <a href="uri">', parser=Parser(),
settings_overrides={'warning_stream': '',
'raw_enabled': False,
- },
- auto_encode=False)
- self.assertNotIn('<raw>', output)
- self.assertIn('<system_message', output)
- self.assertIn('Raw content disabled.', output)
+ })
+ self.assertNotIn(b'<raw>', output)
+ self.assertIn(b'<system_message', output)
+ self.assertIn(b'Raw content disabled.', output)
if __name__ == '__main__':
diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py b/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py
index 93c36dad0..007651d60 100755
--- a/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py
+++ b/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py
@@ -52,19 +52,17 @@ settings = {'warning_stream': ''}
class CodeParsingTests(unittest.TestCase):
def test_lexer_error(self):
- output = publish_string(unknown_language, settings_overrides=settings,
- auto_encode=False)
- self.assertIn('<system_message level="2"', output)
- self.assertIn('Cannot analyze code. '
- 'No Pygments lexer found for "s-lang".', output)
- self.assertIn('<literal_block xml:space="preserve">', output)
+ output = publish_string(unknown_language, settings_overrides=settings)
+ self.assertIn(b'<system_message level="2"', output)
+ self.assertIn(b'Cannot analyze code. '
+ b'No Pygments lexer found for "s-lang".', output)
+ self.assertIn(b'<literal_block xml:space="preserve">', output)
def test_lexer_error_workaround(self):
- output = publish_string(workaround, settings_overrides=settings,
- auto_encode=False)
- self.assertNotIn('<system_message', output)
- self.assertIn('<literal_block classes="code s-lang"', output)
- self.assertIn('autoload("abc_mode", "abc");', output)
+ output = publish_string(workaround, settings_overrides=settings)
+ self.assertNotIn(b'<system_message', output)
+ self.assertIn(b'<literal_block classes="code s-lang"', output)
+ self.assertIn(b'autoload("abc_mode", "abc");', output)
if __name__ == '__main__':
diff --git a/docutils/test/test_publisher.py b/docutils/test/test_publisher.py
index 16c0e0e31..a9da547d3 100755
--- a/docutils/test/test_publisher.py
+++ b/docutils/test/test_publisher.py
@@ -111,10 +111,9 @@ class PublisherTests(unittest.TestCase):
source = 'test → me'
expected = ('<document source="<string>">\n'
' <paragraph>\n'
- ' test → me\n')
+ ' test → me\n').encode('utf-8')
output = core.publish_string(source.encode('utf-16'),
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
self.assertEqual(output, expected)
# encoding declaration in source
@@ -122,8 +121,7 @@ class PublisherTests(unittest.TestCase):
# don't encode output (return `str`)
settings['output_encoding'] = 'unicode'
output = core.publish_string(source.encode('utf-16'),
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
self.assertTrue(output.endswith('Grüße\n'))
def test_publish_string_output_encoding(self):
@@ -134,24 +132,22 @@ class PublisherTests(unittest.TestCase):
expected = ('<document source="<string>">\n'
' <paragraph>\n'
' Grüß → dich\n')
- # current default: encode output, return `bytes`
+ # encode output, return `bytes`
output = bytes(core.publish_string(source,
settings_overrides=settings))
self.assertEqual(output, expected.encode('latin1', 'replace'))
- # no encoding if `auto_encode` is False:
- output = core.publish_string(source, settings_overrides=settings,
- auto_encode=False)
- self.assertEqual(output, expected)
- self.assertEqual(output.encoding, 'latin1')
def test_publish_string_output_encoding_odt(self):
"""The ODT writer generates a zip archive, not a `str`.
TODO: return `str` with document as "flat XML" (.fodt).
"""
- with self.assertRaises(ValueError) as cm:
- core.publish_string('test', writer_name='odt', auto_encode=False)
- self.assertIn('expects `str` instance', str(cm.exception))
+ settings = dict(self.settings)
+ settings['output_encoding'] = 'unicode'
+ with self.assertRaises(AssertionError) as cm:
+ core.publish_string('test', writer_name='odt',
+ settings_overrides=settings)
+ self.assertIn('`data` is no `str` instance', str(cm.exception))
class PublishDoctreeTestCase(unittest.TestCase, docutils.SettingsSpec):
@@ -234,9 +230,8 @@ class PublishDoctreeTestCase(unittest.TestCase, docutils.SettingsSpec):
# Write out the document:
output = core.publish_from_doctree(doctree_zombie,
writer_name='pseudoxml',
- settings_spec=self,
- auto_encode=False)
- self.assertEqual(output, pseudoxml_output)
+ settings_spec=self)
+ self.assertEqual(output.decode(), pseudoxml_output)
if __name__ == '__main__':
diff --git a/docutils/test/test_writers/test_html4css1_misc.py b/docutils/test/test_writers/test_html4css1_misc.py
index 7fa664d26..62dc7659b 100755
--- a/docutils/test/test_writers/test_html4css1_misc.py
+++ b/docutils/test/test_writers/test_html4css1_misc.py
@@ -61,9 +61,8 @@ second term:
second def
"""
result = core.publish_string(data, writer_name='html4css1',
- settings_overrides=self.mys,
- auto_encode=False)
- self.assertIn('<dt class="for the second item">second term:</dt>',
+ settings_overrides=self.mys)
+ self.assertIn(b'<dt class="for the second item">second term:</dt>',
result)
def test_definition_list_item_name(self):
@@ -79,9 +78,8 @@ second term:
second def
"""
result = core.publish_string(data, writer_name='html4css1',
- settings_overrides=self.mys,
- auto_encode=False)
- self.assertIn('<dt id="second-item">second term:</dt>',
+ settings_overrides=self.mys)
+ self.assertIn(b'<dt id="second-item">second term:</dt>',
result)
diff --git a/docutils/test/test_writers/test_html4css1_template.py b/docutils/test/test_writers/test_html4css1_template.py
index 842255495..adc42038f 100755
--- a/docutils/test/test_writers/test_html4css1_template.py
+++ b/docutils/test/test_writers/test_html4css1_template.py
@@ -43,9 +43,7 @@ class WriterPublishTestCase(unittest.TestCase):
'template': template_path,
'stylesheet_path': '/test.css',
'embed_stylesheet': False,
- },
- auto_encode=False,
- )
+ }).decode()
self.assertEqual(output, case_expected)
diff --git a/docutils/test/test_writers/test_html5_polyglot_misc.py b/docutils/test/test_writers/test_html5_polyglot_misc.py
index d242f3d85..933f61d4c 100644
--- a/docutils/test/test_writers/test_html5_polyglot_misc.py
+++ b/docutils/test/test_writers/test_html5_polyglot_misc.py
@@ -67,8 +67,7 @@ second term:
second def
"""
result = core.publish_string(data, writer_name='html5_polyglot',
- settings_overrides=self.mys,
- auto_encode=False)
+ settings_overrides=self.mys).decode()
self.assertIn('<dt class="for the second item">second term:</dt>',
result)
@@ -85,8 +84,7 @@ second term:
second def
"""
result = core.publish_string(data, writer_name='html5_polyglot',
- settings_overrides=self.mys,
- auto_encode=False)
+ settings_overrides=self.mys).decode()
self.assertIn('<dt id="second-item">second term:</dt>',
result)
diff --git a/docutils/test/test_writers/test_html5_template.py b/docutils/test/test_writers/test_html5_template.py
index 2b71e2d87..3f30b45b5 100644
--- a/docutils/test/test_writers/test_html5_template.py
+++ b/docutils/test/test_writers/test_html5_template.py
@@ -43,9 +43,7 @@ class WriterPublishTestCase(unittest.TestCase):
'template': template_path,
'stylesheet_path': '/test.css',
'embed_stylesheet': False,
- },
- auto_encode=False,
- )
+ }).decode()
self.assertEqual(output, case_expected)
diff --git a/docutils/test/test_writers/test_latex2e.py b/docutils/test/test_writers/test_latex2e.py
index 3ae4cd15c..d4ba94b3d 100755
--- a/docutils/test/test_writers/test_latex2e.py
+++ b/docutils/test/test_writers/test_latex2e.py
@@ -44,8 +44,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_default[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=self.settings,
- auto_encode=False)
+ settings_overrides=self.settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_docutils_toc(self):
@@ -56,8 +56,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_docutils_toc[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_book(self):
@@ -68,8 +68,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_book[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_latex_sectnum(self):
@@ -82,8 +82,8 @@ class WriterPublishTestCase(unittest.TestCase):
id=f'samples_latex_sectnum[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_latex_citations(self):
@@ -95,8 +95,8 @@ class WriterPublishTestCase(unittest.TestCase):
id=f'samples_latex_citations[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_table_style_auto(self):
@@ -108,8 +108,8 @@ class WriterPublishTestCase(unittest.TestCase):
id=f'samples_table_style_auto[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_booktabs(self):
@@ -120,8 +120,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_booktabs[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_link_stylesheet(self):
@@ -132,8 +132,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_link_stylesheet[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_embed_embed_stylesheet(self):
@@ -146,8 +146,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_embed_stylesheet[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
def test_bibtex(self):
@@ -158,8 +158,8 @@ class WriterPublishTestCase(unittest.TestCase):
with self.subTest(id=f'samples_bibtex[{name!r}][{casenum}]'):
output = publish_string(source=rst_input,
writer_name=self.writer_name,
- settings_overrides=settings,
- auto_encode=False)
+ settings_overrides=settings)
+ output = output.decode()
self.assertEqual(output, expected)
diff --git a/docutils/test/test_writers/test_manpage.py b/docutils/test/test_writers/test_manpage.py
index 2774d3345..4cbb426e6 100644
--- a/docutils/test/test_writers/test_manpage.py
+++ b/docutils/test/test_writers/test_manpage.py
@@ -31,9 +31,7 @@ class WriterPublishTestCase(unittest.TestCase):
settings_overrides={
'_disable_config': True,
'strict_visitor': True,
- },
- auto_encode=False,
- )
+ }).decode()
self.assertEqual(output, case_expected)
diff --git a/docutils/test/test_writers/test_pseudoxml.py b/docutils/test/test_writers/test_pseudoxml.py
index 9ef309228..58b504027 100755
--- a/docutils/test/test_writers/test_pseudoxml.py
+++ b/docutils/test/test_writers/test_pseudoxml.py
@@ -35,9 +35,7 @@ class WriterPublishTestCase(unittest.TestCase):
settings_overrides={
'_disable_config': True,
'strict_visitor': True,
- },
- auto_encode=False,
- )
+ }).decode()
self.assertEqual(output, case_expected)
for name, cases in totest_detailed.items():
@@ -50,9 +48,7 @@ class WriterPublishTestCase(unittest.TestCase):
'_disable_config': True,
'strict_visitor': True,
'detailed': True,
- },
- auto_encode=False,
- )
+ }).decode()
self.assertEqual(output, case_expected)
diff --git a/docutils/test/test_writers/test_s5.py b/docutils/test/test_writers/test_s5.py
index 9d7432d7e..3c1364f84 100755
--- a/docutils/test/test_writers/test_s5.py
+++ b/docutils/test/test_writers/test_s5.py
@@ -38,9 +38,8 @@ class WriterPublishTestCase(unittest.TestCase):
output = publish_string(
source=case_input,
writer_name=writer_name,
- settings_overrides=settings.copy(),
- auto_encode=False,
- )
+ settings_overrides=settings.copy()
+ ).decode()
self.assertEqual(output, case_expected)
settings['hidden_controls'] = False
@@ -51,9 +50,8 @@ class WriterPublishTestCase(unittest.TestCase):
output = publish_string(
source=case_input,
writer_name=writer_name,
- settings_overrides=settings.copy(),
- auto_encode=False,
- )
+ settings_overrides=settings.copy()
+ ).decode()
self.assertEqual(output, case_expected)