diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2023-05-02 23:04:27 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2023-05-02 23:04:27 +0000 |
commit | 327493250f147bdb6e60a5cefafee46cdc4e8acb (patch) | |
tree | 5fff5ac653669b7cc646922c53286db97c7d10de | |
parent | 7457c3e1896cfe3807929a2e1a18531e02a05f20 (diff) | |
download | docutils-327493250f147bdb6e60a5cefafee46cdc4e8acb.tar.gz |
Revert addition of `io.OutString` and the "auto_encode" argument.
We need a review of the "string output" interface and a consensus
on the "clean" end-state before starting with the implementation.
git-svn-id: https://svn.code.sf.net/p/docutils/code/trunk@9369 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | docutils/HISTORY.txt | 7 | ||||
-rw-r--r-- | docutils/RELEASE-NOTES.txt | 11 | ||||
-rw-r--r-- | docutils/docs/api/publisher.txt | 123 | ||||
-rw-r--r-- | docutils/docs/user/config.txt | 2 | ||||
-rw-r--r-- | docutils/docutils/core.py | 44 | ||||
-rw-r--r-- | docutils/docutils/io.py | 95 | ||||
-rwxr-xr-x | docutils/test/test_io.py | 59 | ||||
-rwxr-xr-x | docutils/test/test_parsers/test_recommonmark/test_misc.py | 18 | ||||
-rwxr-xr-x | docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py | 20 | ||||
-rwxr-xr-x | docutils/test/test_publisher.py | 29 | ||||
-rwxr-xr-x | docutils/test/test_writers/test_html4css1_misc.py | 10 | ||||
-rwxr-xr-x | docutils/test/test_writers/test_html4css1_template.py | 4 | ||||
-rw-r--r-- | docutils/test/test_writers/test_html5_polyglot_misc.py | 6 | ||||
-rw-r--r-- | docutils/test/test_writers/test_html5_template.py | 4 | ||||
-rwxr-xr-x | docutils/test/test_writers/test_latex2e.py | 40 | ||||
-rw-r--r-- | docutils/test/test_writers/test_manpage.py | 4 | ||||
-rwxr-xr-x | docutils/test/test_writers/test_pseudoxml.py | 8 | ||||
-rwxr-xr-x | docutils/test/test_writers/test_s5.py | 10 |
18 files changed, 160 insertions, 334 deletions
diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt index a55c7b815..48de19fdb 100644 --- a/docutils/HISTORY.txt +++ b/docutils/HISTORY.txt @@ -23,8 +23,6 @@ Release 0.20 (unpublished) * docutils/core.py - - New argument "auto_encode" for `publish_string()`, - `publish_from_docstring()` and `publish_programmatically()`. - New functions `rst2…()` for use as "console_scripts" `entry points`_. (cf. `Future changes` in the RELEASE-NOTES_). @@ -33,11 +31,6 @@ Release 0.20 (unpublished) - New configuration setting output_. Obsoletes the ``<destination>`` positional argument (cf. `Future changes` in the RELEASE-NOTES_). -* docutils/io.py - - - New `str` sub-class `io.OutString` with "encoding" and "errors" - attributes. - * docutils/languages/ docutils/parsers/rst/languages/ diff --git a/docutils/RELEASE-NOTES.txt b/docutils/RELEASE-NOTES.txt index 5d42f2738..6a5bfc35a 100644 --- a/docutils/RELEASE-NOTES.txt +++ b/docutils/RELEASE-NOTES.txt @@ -147,11 +147,6 @@ Drop support for Python 3.7 and 3.8 in Docutils 0.21. * "null" writer: output will change to the empty string in Docutils 0.22. -* The default value of the `auto_encode` argument of `core.publish_str()`, - `core.publish_from_doctree()`, and `core.publish_programmatically()` - will change to ``False`` in Docutils 0.22. - The functions will then return a `str` object by default. - * Remove the "rawsource" argument from `nodes.Text.__init__()` (deprecated and ignored since Docutils 0.18) in Docutils 2.0. @@ -223,17 +218,11 @@ Release 0.20 (unpublished) __ `command-line usage pattern`_ -* The new function argument `auto_encode` for Publisher convenience - functions with `String I/O`_ selects whether the output document is - encoded and returned as `bytes` instance. The default is ``True`` (for - backwards compatibility) and will change to ``False`` in Docutils 0.22. - * `utils.find_file_in_dirs()` now returns a POSIX path also on Windows; `utils.get_stylesheet_list()` no longer converts ``\`` to ``/``. * Bugfixes and improvements (see HISTORY_). -.. _String I/O: docs/api/publisher.html#string-i-o .. _output: docs/user/config.html#output diff --git a/docutils/docs/api/publisher.txt b/docutils/docs/api/publisher.txt index 3ff5c4e6c..a31935879 100644 --- a/docutils/docs/api/publisher.txt +++ b/docutils/docs/api/publisher.txt @@ -16,9 +16,8 @@ The ``docutils.core.Publisher`` class is the core of Docutils, managing all the processing and relationships between components. See `PEP 258`_ for an overview of Docutils components. -Configuration_ is done via `runtime settings`_ assembled from several sources. - -The `Publisher convenience functions`_ are the normal entry points for +Configuration is done via `runtime settings`_ assembled from several sources. +The *Publisher convenience functions* are the normal entry points for using Docutils as a library. .. _PEP 258: ../peps/pep-0258.html @@ -42,14 +41,14 @@ a description of the function arguments. publish_cmdline() ----------------- -Function for command-line front-end tools, like ``rst2html.py`` -or functions for `"console_scripts" entry points`_ like `core.rst2html()` -with file I/O. -In addition to writing the output document to a file, also returns it as -`str` instance (rsp. `bytes` for binary output document formats). +Function for command-line front-end tools, like ``rst2html.py`` or +`"console_scripts" entry points`_ like `core.rst2html()` with file I/O. +In addition to writing the output document to a file-like object, also +returns it as `str` instance (rsp. `bytes` for binary output document +formats). There are several examples in the ``tools/`` directory of the Docutils -repository. A detailed analysis of one such tool is in `Inside A Docutils +repository. A detailed analysis of one such tool is `Inside A Docutils Command-Line Front-End Tool`_. .. _"console_scripts" entry points: @@ -60,9 +59,9 @@ Command-Line Front-End Tool`_. publish_file() -------------- -For programmatic use with file-like I/O. -In addition to writing the output document to a file, also returns it as -`str` instance (rsp. `bytes` for binary output document formats). +For programmatic use with file I/O. In addition to writing the output +document to a file-like object, also returns it as `str` instance +(rsp. `bytes` for binary output document formats). publish_string() @@ -75,23 +74,23 @@ Input `bytes` are decoded with input_encoding_. Output - is a memory object: - - * a `str` instance [#]_, if the "auto_encode" function argument is - ``False`` or output_encoding_ is set to the special value + * is a `bytes` instance, if output_encoding_ is set to an encoding + registered with Python's "codecs_" module (default: "utf-8"), + * a `str` instance, if output_encoding_ is set to the special value ``"unicode"``. - * a `bytes` instance, if the "auto_encode" argument is ``True`` and - output_encoding_ is set to an encoding registered with - Python's "codecs_" module (default: "utf-8"). +.. Caution:: + The "output_encoding" and "output_encoding_error_handler" `runtime + settings`_ may affect the content of the output document: + Some document formats contain an *encoding declaration*, + some formats use substitutions for non-encodable characters. - Calling ``output = bytes(publish_string(…))`` ensures that ``output`` - is a `bytes` instance encoded with the configured output_encoding_ - (matching the encoding indicated inside HTML, XML, and LaTeX documents). + Use `publish_parts()`_ to get a `str` instance of the output document + as well as the values of the output_encoding_ and + output_encoding_error_handler_ runtime settings. -.. [#] More precisely, an instance of a `str` sub-class with the - output_encoding_ and output_encoding_error_handler_ configuration - settings stored as "encoding" and "errors" attributes. +*This function is provisional* because in Python 3 the name and behaviour +no longer match. .. _codecs: https://docs.python.org/3/library/codecs.html @@ -110,15 +109,16 @@ publish_from_doctree() Render from an existing `document tree`_ data structure (doctree). Returns the output document as a memory object (cf. `string I/O`_). +*This function is provisional* because in Python 3 the name and behaviour +of the *string output* interface no longer match. + publish_programmatically() -------------------------- Auxilliary function used by `publish_file()`_, `publish_string()`_, -`publish_doctree()`_, and `publish_parts()`_. -It returns a 2-tuple: the output document as memory object (cf. `string -I/O`_) and the Publisher object. - +`publish_doctree()`_, and `publish_parts()`_. +Applications should not need to call this function directly. .. _publish-parts-details: @@ -126,14 +126,24 @@ publish_parts() --------------- For programmatic use with string input (cf. `string I/O`_). -Returns a dictionary of document parts. Dictionary keys are the names of -parts, and values are `str` instances; encoding is up to the client. -Useful when only portions of the processed document are desired. +Returns a dictionary of document parts as `str` instances. [#binary-output]_ +Dictionary keys are the part names. +Each Writer component may publish a different set of document parts, +described below. -There are usage examples in the `docutils/examples.py`_ module. +Example: post-process the output document with a custom function +``post_process()`` before encoding with user-customizable encoding +and errors :: -Each Writer component may publish a different set of document parts, -described below. Not all writers implement all parts. + def publish_bytes_with_postprocessing(*args, **kwargs): + parts = publish_parts(*args, **kwargs) + out_str = post_process(parts['whole']) + return out_str.encode(parts['encoding'], parts['errors']) + +There are more usage examples in the `docutils/examples.py`_ module. + +.. _docutils/examples.py: ../../docutils/examples.py +.. _ODT: ../user/odt.html Parts Provided By All Writers @@ -141,7 +151,7 @@ Parts Provided By All Writers _`encoding` The `output_encoding`_ setting. - + _`errors` The `output_encoding_error_handler`_ setting. @@ -149,7 +159,10 @@ _`version` The version of Docutils used. _`whole` - ``parts['whole']`` contains the entire formatted document. + Contains the entire formatted document. [#binary-output]_ + + .. [#binary-output] Output documents in binary formats (e.g. ODT_) + are stored as a `bytes` instance. Parts Provided By the HTML Writers @@ -233,8 +246,8 @@ _`html_body` _`html_head` ``parts['html_head']`` contains the HTML ``<head>`` content, less the stylesheet link and the ``<head>`` and ``</head>`` tags - themselves. Since ``publish_parts`` returns `str` instances and - does not know about the output encoding, the "Content-Type" meta + themselves. Since `publish_parts()` returns `str` instances which + do not know about the output encoding, the "Content-Type" meta tag's "charset" value is left unresolved, as "%s":: <meta http-equiv="Content-Type" content="text/html; charset=%s" /> @@ -388,39 +401,42 @@ titledata https://docutils.sourceforge.io/docutils/writers/latex2e/xelatex.tex -.. _docutils/examples.py: ../../docutils/examples.py - +.. _runtime settings: Configuration ============= -Docutils is configured by runtime settings assembled from several +Docutils is configured by *runtime settings* assembled from several sources: * *settings specifications* of the selected components (reader, parser, writer), -* *configuration files* (if enabled), and +* the ``settings_overrides`` argument of the `Publisher convenience + functions`_ (see below), +* *configuration files* (unless disabled), and * *command-line options* (if enabled). -The individual settings are described in `Docutils Configuration`_. - Docutils overlays default and explicitly specified values from these sources such that settings behave the way we want and expect them to behave. For details, see `Docutils Runtime Settings`_. +The individual settings are described in `Docutils Configuration`_. To pass application-specific setting defaults to the Publisher convenience functions, use the ``settings_overrides`` parameter. Pass a dictionary of setting names & values, like this:: - overrides = {'input_encoding': 'ascii', - 'output_encoding': 'latin-1'} - output = publish_string(..., settings_overrides=overrides) + app_defaults = {'input_encoding': 'ascii', + 'output_encoding': 'latin-1'} + output = publish_string(..., settings_overrides=app_defaults) Settings from command-line options override configuration file settings, and they override application defaults. -Further customization is possible creating custom component -objects and passing *them* to ``publish_*()`` or the ``Publisher``. +See `Docutils Runtime Settings`_ or the docstring of +`publish_programmatically()` for a description of all `configuration +arguments`_ of the Publisher convenience functions. + +.. _configuration arguments: runtime-settings.html#convenience-functions Encodings @@ -453,7 +469,9 @@ The default **output encoding** is UTF-8. A different encoding can be specified with the `output_encoding`_ setting. .. Caution:: Docutils may introduce non-ASCII text if you use - `auto-symbol footnotes`_ or the `"contents" directive`_. + `auto-symbol footnotes`_ or the `"contents" directive`_. + In non-English documents, also auto-generated labels + may contain non-ASCII characters. .. [#magic-comment] A comment like :: @@ -496,9 +514,8 @@ A different encoding can be specified with the `output_encoding`_ setting. ../ref/rst/restructuredtext.html#auto-symbol-footnotes .. _"contents" directive: ../ref/rst/directives.html#table-of-contents -.. _document tree: +.. _document tree: .. _Docutils document tree: ../ref/doctree.html -.. _runtime settings: .. _Docutils Runtime Settings: ./runtime-settings.html .. _Docutils Configuration: ../user/config.html .. _inspecting_codecs: https://codeberg.org/milde/inspecting-codecs diff --git a/docutils/docs/user/config.txt b/docutils/docs/user/config.txt index a6d5ed398..a09572912 100644 --- a/docutils/docs/user/config.txt +++ b/docutils/docs/user/config.txt @@ -476,7 +476,7 @@ record_dependencies Path to a file where Docutils will write a list of files that were required to generate the output, e.g. included files or embedded -stylesheets [#dependencies]_. [#pwd]_ The format is one path per +stylesheets. [#dependencies]_ [#pwd]_ The format is one path per line with forward slashes as separator, the encoding is UTF-8. Set to ``-`` in order to write dependencies to stdout. diff --git a/docutils/docutils/core.py b/docutils/docutils/core.py index 3f99526c9..ef89db986 100644 --- a/docutils/docutils/core.py +++ b/docutils/docutils/core.py @@ -439,27 +439,21 @@ def publish_string(source, source_path=None, destination_path=None, writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, - enable_exit_status=False, - auto_encode=True): + enable_exit_status=False): """ Set up & run a `Publisher` for programmatic use with string I/O. Accepts a `bytes` or `str` instance as `source`. - If `auto_encode` is True, the output is encoded according to the - `output_encoding`_ setting; the return value is a `bytes` instance - (unless `output_encoding`_ is "unicode", - cf. `docutils.io.StringOutput.write()`). - - If `auto_encode` is False, the output is an instance of a `str` - sub-class with "output_encoding" and "output_encoding_error_handler" - settings stored as `encoding` and `errors` attributes. - - The default value of `auto_encode` will change to ``False`` in - Docutils 0.22. + The output is encoded according to the `output_encoding`_ setting; + the return value is a `bytes` instance (unless `output_encoding`_ is + "unicode", cf. `docutils.io.StringOutput.write()`). Parameters: see `publish_programmatically()`. + This function is provisional because in Python 3 name and behaviour + no longer match. + .. _output_encoding: https://docutils.sourceforge.io/docs/user/config.html#output-encoding """ @@ -473,8 +467,7 @@ def publish_string(source, source_path=None, destination_path=None, settings=settings, settings_spec=settings_spec, settings_overrides=settings_overrides, config_section=config_section, - enable_exit_status=enable_exit_status, - auto_encode=auto_encode) + enable_exit_status=enable_exit_status) return output @@ -546,8 +539,7 @@ def publish_from_doctree(document, destination_path=None, writer=None, writer_name='pseudoxml', settings=None, settings_spec=None, settings_overrides=None, config_section=None, - enable_exit_status=False, - auto_encode=True): + enable_exit_status=False): """ Set up & run a `Publisher` to render from an existing document tree data structure. For programmatic use with string output @@ -563,6 +555,9 @@ def publish_from_doctree(document, destination_path=None, document tree. Other parameters: see `publish_programmatically()`. + + This function is provisional because in Python 3 name and behaviour + of the `io.StringOutput` class no longer match. """ reader = doctree.Reader(parser_name='null') publisher = Publisher(reader, None, writer, @@ -574,7 +569,6 @@ def publish_from_doctree(document, destination_path=None, publisher.process_programmatic_settings( settings_spec, settings_overrides, config_section) publisher.set_destination(None, destination_path) - publisher.destination.auto_encode = auto_encode return publisher.publish(enable_exit_status=enable_exit_status) @@ -623,12 +617,12 @@ def publish_programmatically(source_class, source, source_path, writer, writer_name, settings, settings_spec, settings_overrides, config_section, - enable_exit_status, - auto_encode=True): + enable_exit_status): """ Set up & run a `Publisher` for custom programmatic use. - Return the output (as `str` or `bytes`) and the Publisher object. + Return the output (as `str` or `bytes`, depending on `destination_class`, + writer, and the "output_encoding" setting) and the Publisher object. Applications should not need to call this function directly. If it does seem to be necessary to call this function directly, please write to the @@ -716,12 +710,6 @@ def publish_programmatically(source_class, source, source_path, defined by `settings_spec`. Used only if no `settings` specified. * `enable_exit_status`: Boolean; enable exit status at end of processing? - - * `auto_encode`: Boolean; encode string output and return `bytes`? - Ignored with `io.FileOutput`. - New in Docutils 0.21. - The default value will change to ``False`` in Docutils 0.22 or later. - The argument may be removed in Docutils 2.0 or later. """ publisher = Publisher(reader, parser, writer, settings=settings, source_class=source_class, @@ -731,8 +719,6 @@ def publish_programmatically(source_class, source, source_path, settings_spec, settings_overrides, config_section) publisher.set_source(source, source_path) publisher.set_destination(destination, destination_path) - if isinstance(publisher.destination, io.StringOutput): - publisher.destination.auto_encode = auto_encode output = publisher.publish(enable_exit_status=enable_exit_status) return output, publisher diff --git a/docutils/docutils/io.py b/docutils/docutils/io.py index 99f8b121a..a1cc5e44e 100644 --- a/docutils/docutils/io.py +++ b/docutils/docutils/io.py @@ -74,57 +74,6 @@ def error_string(err): return f'{err.__class__.__name__}: {err}' -class OutString(str): - """Return a string representation of `object` with known encoding. - - Differences to `str()`: - - If the `encoding` is given, both `str` instances and byte-like objects - are stored as text string, the latter decoded with `encoding` and - `errors` (defaulting to 'strict'). - - The encoding is never guessed. If `encoding` is None (the default), - an informal string representation is used, also if `errors` are given. - - The original or intended encoding and error handler are stored in the - attributes `encoding` and `errors`. - Typecasting to `bytes` uses the stored values. - """ - - def __new__(cls, object, encoding=None, errors='strict'): - """Return a new OutString object. - - Provisional. - """ - try: - # decode bytes-like objects if encoding is known - return super().__new__(cls, object, encoding, errors) - except TypeError: - return super().__new__(cls, object) - - def __init__(self, object, encoding=None, errors='strict'): - """Set "encoding" and "errors" attributes.""" - self.encoding = encoding - self.errors = errors - - def __bytes__(self): - try: - return super().encode(self.encoding, self.errors) - except TypeError: - raise TypeError('OutString instance without known encoding') - - def __repr__(self): - if self.errors != 'strict': - errors_arg = f', errors={self.errors!r}' - else: - errors_arg = '' - return (f'{self.__class__.__name__}({super().__repr__()}, ' - f'encoding={self.encoding!r}{errors_arg})') - - def encode(self, encoding=None, errors=None): - return super().encode(encoding or self.encoding, errors or self.errors) - - class Input(TransformSpec): """ Abstract base class for input wrappers. @@ -652,53 +601,29 @@ class StringInput(Input): class StringOutput(Output): - """Output to a `bytes` or `str` instance.""" - - default_destination_path = '<string>' - - def __init__(self, destination=None, destination_path=None, - encoding=None, error_handler='strict', auto_encode=True): - """Initialize self. + """Output to a `bytes` or `str` instance. - `auto_encode` determines the return type of `self.write()`. - Its default value will change to False in Docutils 0.22. - Other attributes are passed to `Output.__init__()`. - """ + Provisional. + """ - self.auto_encode = auto_encode - """Let `write()` encode the output document and return `bytes`.""" - super().__init__(destination, destination_path, - encoding, error_handler) + default_destination_path = '<string>' def write(self, data): """Store `data` in `self.destination`, and return it. - If `self.auto_encode` is False, `data` must be a `str` instance - and is stored/returned as `str` sub-class `OutString` with - attributes "encoding" and "errors" set to `self.encoding` - and `self.error_handler` respectively. + If `self.encoding` is set to the pseudo encoding name "unicode", + `data` must be a `str` instance and is stored/returned unchanged + (cf. `Output.encode`). - If `self.auto_encode` is True, `data` can be a `bytes` or `str` - instance and is stored/returned as a `bytes` instance + Otherwise, `data` can be a `bytes` or `str` instance and is + stored/returned as a `bytes` instance (`str` data is encoded with `self.encode()`). - Exception (provisional): If `self.encoding` is set to the pseudo - encoding name "unicode", `data` must be a `str` instance and is - stored/returned unchanged (cf. `Output.encode`). Attention: the `output_encoding`_ setting may affect the content of the output (e.g. an encoding declaration in HTML or XML or the representation of characters as LaTeX macro vs. literal character). """ - if self.auto_encode: - self.destination = self.encode(data) - return self.destination - if not isinstance(data, str): - raise ValueError('StringOutput.write() expects `str` instance, ' - f'not {type(data)}.') - encoding = self.encoding - if not encoding or encoding.lower() == 'unicode': - encoding = None - self.destination = OutString(data, encoding, self.error_handler) + self.destination = self.encode(data) return self.destination diff --git a/docutils/test/test_io.py b/docutils/test/test_io.py index 6c8c70254..b1e55a148 100755 --- a/docutils/test/test_io.py +++ b/docutils/test/test_io.py @@ -190,19 +190,6 @@ class OutputTests(unittest.TestCase): fo.write(self.udata) self.assertEqual(self.udrain.getvalue(), self.udata) - def test_write_auto_encode_false(self): - so = du_io.StringOutput(encoding='latin1', error_handler='replace', - auto_encode=False) - output = so.write(self.udata) - # store output in self.destination and also return it - self.assertEqual(output, self.udata) - self.assertEqual(so.destination, self.udata) - # store also encoding and encoding error handler ... - self.assertEqual(output.encoding, 'latin1') - self.assertEqual(output.errors, 'replace') - # ... to allow easy conversion to `bytes`: - self.assertEqual(bytes(output), self.bdata) - def test_FileOutput_hande_io_errors_deprection_warning(self): with self.assertWarnsRegex(DeprecationWarning, '"handle_io_errors" is ignored'): @@ -238,52 +225,6 @@ class OutputTests(unittest.TestCase): self.assertRaises(ValueError, fo.write, self.udata) -class OutStringTests(unittest.TestCase): - - def test__init__defaults(self): - """Test `__new__()` and `__init__()` with default values.""" - - os = du_io.OutString('Grüße') - self.assertEqual(str(os), 'Grüße') - self.assertEqual(os.encoding, None) - self.assertEqual(os.errors, 'strict') - # converting to `bytes` fails if the encoding is not known: - with self.assertRaises(TypeError): - self.assertEqual(bytes(os), 'Grüße') - # without known encoding, `bytes` and other incompatible types - # are converted to their string representation ... - bos = du_io.OutString(b'gut') - self.assertEqual(str(bos), "b'gut'") - bos_e = du_io.OutString('Grüße'.encode('latin1'), errors='ignore') - self.assertEqual(str(bos_e), r"b'Gr\xfc\xdfe'") - bos = du_io.OutString(b'gut', encoding=None) - self.assertEqual(str(bos), "b'gut'") - - def test__init__custom_attributes(self): - """Test `__new__()` and `__init__()` with custom encoding.""" - os8 = du_io.OutString('Grüße', encoding='utf-8') - self.assertEqual(str(os8), 'Grüße') - self.assertEqual(bytes(os8), b'Gr\xc3\xbc\xc3\x9fe') - self.assertEqual(repr(os8), "OutString('Grüße', encoding='utf-8')") - # With known encoding, "bytes-like" objects are decoded - bos1 = du_io.OutString(b'Gr\xfc\xdfe', encoding='latin1') - self.assertEqual(str(bos1), 'Grüße') - self.assertEqual(bytes(bos1), b'Gr\xfc\xdfe') - # Invalid encodings (including the empty string) raise an error - with self.assertRaises(LookupError): - du_io.OutString(b'Gr\xfc\xdfe', encoding='') - - def test__init__custom_errors(self): - """Test `__new__()` and `__init__()` with custom `errors`.""" - ts8_r = du_io.OutString('Grüße', encoding='utf-8', errors='replace') - # Encoding uses the stored error handler: - self.assertEqual(ts8_r.encode('ascii'), b'Gr??e') - # Initialization with a `bytes` object uses the error handler, too: - bts8_r = du_io.OutString(b'Gr\xfc\xdfe', encoding='utf-8', - errors='replace') - self.assertEqual(str(bts8_r), 'Gr��e') - - class ErrorOutputTests(unittest.TestCase): def test_defaults(self): e = du_io.ErrorOutput() diff --git a/docutils/test/test_parsers/test_recommonmark/test_misc.py b/docutils/test/test_parsers/test_recommonmark/test_misc.py index e0e2265b7..5ab7b9fd1 100755 --- a/docutils/test/test_parsers/test_recommonmark/test_misc.py +++ b/docutils/test/test_parsers/test_recommonmark/test_misc.py @@ -54,21 +54,19 @@ class RecommonmarkParserTests(unittest.TestCase): settings_overrides={ 'warning_stream': '', 'raw_enabled': False, - }, - auto_encode=False) - self.assertNotIn('<raw>', output) - self.assertIn('<system_message', output) - self.assertIn('Raw content disabled.', output) + }) + self.assertNotIn(b'<raw>', output) + self.assertIn(b'<system_message', output) + self.assertIn(b'Raw content disabled.', output) def test_raw_disabled_inline(self): output = publish_string('foo <a href="uri">', parser=Parser(), settings_overrides={'warning_stream': '', 'raw_enabled': False, - }, - auto_encode=False) - self.assertNotIn('<raw>', output) - self.assertIn('<system_message', output) - self.assertIn('Raw content disabled.', output) + }) + self.assertNotIn(b'<raw>', output) + self.assertIn(b'<system_message', output) + self.assertIn(b'Raw content disabled.', output) if __name__ == '__main__': diff --git a/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py b/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py index 93c36dad0..007651d60 100755 --- a/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py +++ b/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py @@ -52,19 +52,17 @@ settings = {'warning_stream': ''} class CodeParsingTests(unittest.TestCase): def test_lexer_error(self): - output = publish_string(unknown_language, settings_overrides=settings, - auto_encode=False) - self.assertIn('<system_message level="2"', output) - self.assertIn('Cannot analyze code. ' - 'No Pygments lexer found for "s-lang".', output) - self.assertIn('<literal_block xml:space="preserve">', output) + output = publish_string(unknown_language, settings_overrides=settings) + self.assertIn(b'<system_message level="2"', output) + self.assertIn(b'Cannot analyze code. ' + b'No Pygments lexer found for "s-lang".', output) + self.assertIn(b'<literal_block xml:space="preserve">', output) def test_lexer_error_workaround(self): - output = publish_string(workaround, settings_overrides=settings, - auto_encode=False) - self.assertNotIn('<system_message', output) - self.assertIn('<literal_block classes="code s-lang"', output) - self.assertIn('autoload("abc_mode", "abc");', output) + output = publish_string(workaround, settings_overrides=settings) + self.assertNotIn(b'<system_message', output) + self.assertIn(b'<literal_block classes="code s-lang"', output) + self.assertIn(b'autoload("abc_mode", "abc");', output) if __name__ == '__main__': diff --git a/docutils/test/test_publisher.py b/docutils/test/test_publisher.py index 16c0e0e31..a9da547d3 100755 --- a/docutils/test/test_publisher.py +++ b/docutils/test/test_publisher.py @@ -111,10 +111,9 @@ class PublisherTests(unittest.TestCase): source = 'test → me' expected = ('<document source="<string>">\n' ' <paragraph>\n' - ' test → me\n') + ' test → me\n').encode('utf-8') output = core.publish_string(source.encode('utf-16'), - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) self.assertEqual(output, expected) # encoding declaration in source @@ -122,8 +121,7 @@ class PublisherTests(unittest.TestCase): # don't encode output (return `str`) settings['output_encoding'] = 'unicode' output = core.publish_string(source.encode('utf-16'), - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) self.assertTrue(output.endswith('Grüße\n')) def test_publish_string_output_encoding(self): @@ -134,24 +132,22 @@ class PublisherTests(unittest.TestCase): expected = ('<document source="<string>">\n' ' <paragraph>\n' ' Grüß → dich\n') - # current default: encode output, return `bytes` + # encode output, return `bytes` output = bytes(core.publish_string(source, settings_overrides=settings)) self.assertEqual(output, expected.encode('latin1', 'replace')) - # no encoding if `auto_encode` is False: - output = core.publish_string(source, settings_overrides=settings, - auto_encode=False) - self.assertEqual(output, expected) - self.assertEqual(output.encoding, 'latin1') def test_publish_string_output_encoding_odt(self): """The ODT writer generates a zip archive, not a `str`. TODO: return `str` with document as "flat XML" (.fodt). """ - with self.assertRaises(ValueError) as cm: - core.publish_string('test', writer_name='odt', auto_encode=False) - self.assertIn('expects `str` instance', str(cm.exception)) + settings = dict(self.settings) + settings['output_encoding'] = 'unicode' + with self.assertRaises(AssertionError) as cm: + core.publish_string('test', writer_name='odt', + settings_overrides=settings) + self.assertIn('`data` is no `str` instance', str(cm.exception)) class PublishDoctreeTestCase(unittest.TestCase, docutils.SettingsSpec): @@ -234,9 +230,8 @@ class PublishDoctreeTestCase(unittest.TestCase, docutils.SettingsSpec): # Write out the document: output = core.publish_from_doctree(doctree_zombie, writer_name='pseudoxml', - settings_spec=self, - auto_encode=False) - self.assertEqual(output, pseudoxml_output) + settings_spec=self) + self.assertEqual(output.decode(), pseudoxml_output) if __name__ == '__main__': diff --git a/docutils/test/test_writers/test_html4css1_misc.py b/docutils/test/test_writers/test_html4css1_misc.py index 7fa664d26..62dc7659b 100755 --- a/docutils/test/test_writers/test_html4css1_misc.py +++ b/docutils/test/test_writers/test_html4css1_misc.py @@ -61,9 +61,8 @@ second term: second def """ result = core.publish_string(data, writer_name='html4css1', - settings_overrides=self.mys, - auto_encode=False) - self.assertIn('<dt class="for the second item">second term:</dt>', + settings_overrides=self.mys) + self.assertIn(b'<dt class="for the second item">second term:</dt>', result) def test_definition_list_item_name(self): @@ -79,9 +78,8 @@ second term: second def """ result = core.publish_string(data, writer_name='html4css1', - settings_overrides=self.mys, - auto_encode=False) - self.assertIn('<dt id="second-item">second term:</dt>', + settings_overrides=self.mys) + self.assertIn(b'<dt id="second-item">second term:</dt>', result) diff --git a/docutils/test/test_writers/test_html4css1_template.py b/docutils/test/test_writers/test_html4css1_template.py index 842255495..adc42038f 100755 --- a/docutils/test/test_writers/test_html4css1_template.py +++ b/docutils/test/test_writers/test_html4css1_template.py @@ -43,9 +43,7 @@ class WriterPublishTestCase(unittest.TestCase): 'template': template_path, 'stylesheet_path': '/test.css', 'embed_stylesheet': False, - }, - auto_encode=False, - ) + }).decode() self.assertEqual(output, case_expected) diff --git a/docutils/test/test_writers/test_html5_polyglot_misc.py b/docutils/test/test_writers/test_html5_polyglot_misc.py index d242f3d85..933f61d4c 100644 --- a/docutils/test/test_writers/test_html5_polyglot_misc.py +++ b/docutils/test/test_writers/test_html5_polyglot_misc.py @@ -67,8 +67,7 @@ second term: second def """ result = core.publish_string(data, writer_name='html5_polyglot', - settings_overrides=self.mys, - auto_encode=False) + settings_overrides=self.mys).decode() self.assertIn('<dt class="for the second item">second term:</dt>', result) @@ -85,8 +84,7 @@ second term: second def """ result = core.publish_string(data, writer_name='html5_polyglot', - settings_overrides=self.mys, - auto_encode=False) + settings_overrides=self.mys).decode() self.assertIn('<dt id="second-item">second term:</dt>', result) diff --git a/docutils/test/test_writers/test_html5_template.py b/docutils/test/test_writers/test_html5_template.py index 2b71e2d87..3f30b45b5 100644 --- a/docutils/test/test_writers/test_html5_template.py +++ b/docutils/test/test_writers/test_html5_template.py @@ -43,9 +43,7 @@ class WriterPublishTestCase(unittest.TestCase): 'template': template_path, 'stylesheet_path': '/test.css', 'embed_stylesheet': False, - }, - auto_encode=False, - ) + }).decode() self.assertEqual(output, case_expected) diff --git a/docutils/test/test_writers/test_latex2e.py b/docutils/test/test_writers/test_latex2e.py index 3ae4cd15c..d4ba94b3d 100755 --- a/docutils/test/test_writers/test_latex2e.py +++ b/docutils/test/test_writers/test_latex2e.py @@ -44,8 +44,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_default[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=self.settings, - auto_encode=False) + settings_overrides=self.settings) + output = output.decode() self.assertEqual(output, expected) def test_docutils_toc(self): @@ -56,8 +56,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_docutils_toc[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_book(self): @@ -68,8 +68,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_book[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_latex_sectnum(self): @@ -82,8 +82,8 @@ class WriterPublishTestCase(unittest.TestCase): id=f'samples_latex_sectnum[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_latex_citations(self): @@ -95,8 +95,8 @@ class WriterPublishTestCase(unittest.TestCase): id=f'samples_latex_citations[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_table_style_auto(self): @@ -108,8 +108,8 @@ class WriterPublishTestCase(unittest.TestCase): id=f'samples_table_style_auto[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_booktabs(self): @@ -120,8 +120,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_booktabs[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_link_stylesheet(self): @@ -132,8 +132,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_link_stylesheet[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_embed_embed_stylesheet(self): @@ -146,8 +146,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_embed_stylesheet[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) def test_bibtex(self): @@ -158,8 +158,8 @@ class WriterPublishTestCase(unittest.TestCase): with self.subTest(id=f'samples_bibtex[{name!r}][{casenum}]'): output = publish_string(source=rst_input, writer_name=self.writer_name, - settings_overrides=settings, - auto_encode=False) + settings_overrides=settings) + output = output.decode() self.assertEqual(output, expected) diff --git a/docutils/test/test_writers/test_manpage.py b/docutils/test/test_writers/test_manpage.py index 2774d3345..4cbb426e6 100644 --- a/docutils/test/test_writers/test_manpage.py +++ b/docutils/test/test_writers/test_manpage.py @@ -31,9 +31,7 @@ class WriterPublishTestCase(unittest.TestCase): settings_overrides={ '_disable_config': True, 'strict_visitor': True, - }, - auto_encode=False, - ) + }).decode() self.assertEqual(output, case_expected) diff --git a/docutils/test/test_writers/test_pseudoxml.py b/docutils/test/test_writers/test_pseudoxml.py index 9ef309228..58b504027 100755 --- a/docutils/test/test_writers/test_pseudoxml.py +++ b/docutils/test/test_writers/test_pseudoxml.py @@ -35,9 +35,7 @@ class WriterPublishTestCase(unittest.TestCase): settings_overrides={ '_disable_config': True, 'strict_visitor': True, - }, - auto_encode=False, - ) + }).decode() self.assertEqual(output, case_expected) for name, cases in totest_detailed.items(): @@ -50,9 +48,7 @@ class WriterPublishTestCase(unittest.TestCase): '_disable_config': True, 'strict_visitor': True, 'detailed': True, - }, - auto_encode=False, - ) + }).decode() self.assertEqual(output, case_expected) diff --git a/docutils/test/test_writers/test_s5.py b/docutils/test/test_writers/test_s5.py index 9d7432d7e..3c1364f84 100755 --- a/docutils/test/test_writers/test_s5.py +++ b/docutils/test/test_writers/test_s5.py @@ -38,9 +38,8 @@ class WriterPublishTestCase(unittest.TestCase): output = publish_string( source=case_input, writer_name=writer_name, - settings_overrides=settings.copy(), - auto_encode=False, - ) + settings_overrides=settings.copy() + ).decode() self.assertEqual(output, case_expected) settings['hidden_controls'] = False @@ -51,9 +50,8 @@ class WriterPublishTestCase(unittest.TestCase): output = publish_string( source=case_input, writer_name=writer_name, - settings_overrides=settings.copy(), - auto_encode=False, - ) + settings_overrides=settings.copy() + ).decode() self.assertEqual(output, case_expected) |