sandbox/html4strict/xhtml11/__init__.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

# .. coding: utf8
# :Author: Günter Milde <milde@users.berlios.de>
# :Revision: $Revision$
# :Date: $Date: 2005-06-28$
# :Copyright: © 2005, 2009 Günter Milde.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
#    Copying and distribution of this file, with or without modification,
#    are permitted in any medium without royalty provided the copyright
#    notice and this notice are preserved.
#    This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause

"""
Strict eXtensible HyperText Markup Language (XHTML) document Writer.

This is a variant of Docutils' `html-base` writer.
The output conforms to the XHTML version 1.1 DTD.
"""

__docformat__ = 'reStructuredText'

import os
import os.path
import re

import docutils
from docutils import frontend, nodes, utils, writers, languages
from docutils.writers import html_base

class Writer(html_base.Writer):

    supported = ('html', 'html4', 'html4strict', 'html4css2',
                 'xhtml', 'xhtml1', 'xhtml1strict', 'xhtml11')
    """Formats this writer supports."""

    default_stylesheets = ['minimal.css', 'plain.css', 'xhtml11.css']
    default_stylesheet_dirs = ['.',
        os.path.abspath(os.path.dirname(__file__)),
        os.path.abspath(os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'html_base'))
       ]

    config_section = 'xhtml11 writer'
    config_section_dependencies = ('writers', 'html writer')

    settings_spec = frontend.filter_settings_spec(
        html_base.Writer.settings_spec,
        stylesheet_path = (
          'Comma separated list of stylesheet paths. '
          'Relative paths are expanded if a matching file is found in '
          'the --stylesheet-dirs. With --link-stylesheet, '
          'the path is rewritten relative to the output HTML file. '
          'Default: "%s"' % ','.join(default_stylesheets),
          ['--stylesheet-path'],
          {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
           'validator': frontend.validate_comma_separated_list,
           'default': default_stylesheets}),
        stylesheet_dirs = (
          'Comma-separated list of directories where stylesheets are found. '
          'Used by --stylesheet-path when expanding relative path arguments. '
          'Default: "%s"' % default_stylesheet_dirs,
          ['--stylesheet-dirs'],
          {'metavar': '<dir[,dir,...]>',
           'validator': frontend.validate_comma_separated_list,
           'default': default_stylesheet_dirs}),
        math_output = ('Math output format, one of "MathML", "HTML", '
            '"MathJax" or "LaTeX". Default: "MathML"',
            ['--math-output'],
            {'default': 'MathML'}),
        xml_declaration = ('Prepend an XML declaration. '
          'Default: True',
          ['--xml-declaration'],
          {'default': True, 'action': 'store_true',
           'validator': frontend.validate_boolean}))

    def __init__(self):
        writers.Writer.__init__(self)
        self.translator_class = HTMLTranslator


class HTMLTranslator(html_base.HTMLTranslator):
    """
    This writer generates XHTML 1.1
    without formatting that interferes with a CSS stylesheet.
    """
    doctype = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" '
               '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
    doctype_mathml = (
        '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" '
        '"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd">\n')

# there is no attribute "lang" in XHTML 1.1

    lang_attribute = 'xml:lang' # changed from 'lang' in XHTML 1.0
    head_prefix_template = ('<html xmlns="http://www.w3.org/1999/xhtml"'
                            ' xml:lang="%(lang)s">\n<head>\n')


# enumerated lists
# ----------------
# The 'start' attribute does not conform to HTML4/XHTML1 Strict
# (resurfaced in HTML5)

    def visit_enumerated_list(self, node):
        atts = {}
        if 'start' in node:
            atts['style'] = 'counter-reset: item %d;' % (node['start'] - 1)
        classes = node.setdefault('classes', [])
        if 'enumtype' in node:
            classes.append(node['enumtype'])
        if self.is_compactable(node):
            classes.append('simple')
        self.body.append(self.starttag(node, 'ol', **atts))


# <sup> and <sub> tags (possible with parsed-literal) are not allowed
# in <pre> --- use <span> ::

    def visit_subscript(self, node):
        if isinstance(node.parent, nodes.literal_block):
            self.body.append(self.starttag(node, 'span', '',
                                           CLASS='subscript'))
        else:
            self.body.append(self.starttag(node, 'sub', ''))

    def depart_subscript(self, node):
        if isinstance(node.parent, nodes.literal_block):
            self.body.append('</span>')
        else:
            self.body.append('</sub>')


    def visit_superscript(self, node):
        # <sup> not allowed in <pre>
        if isinstance(node.parent, nodes.literal_block):
            self.body.append(self.starttag(node, 'span', '',
                                           CLASS='superscript'))
        else:
            self.body.append(self.starttag(node, 'sup', ''))

    def depart_superscript(self, node):
        if isinstance(node.parent, nodes.literal_block):
            self.body.append('</span>')
        else:
            self.body.append('</sup>')

# Wrap inline MathML in <span>, as it is not allowed directly in a <pre> block
# (possible with parsed-literal)::

    math_tags = {# math_output: (block, inline, class-arguments)
                 'mathml':      ('div', 'span', ''),
                 'html':        ('div', 'span', 'formula'),
                 'mathjax':     ('div', 'span', 'math'),
                 'latex':       ('pre', 'tt',   'math'),
                }

# Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
# HTML5/polyglott recommends using both

    def visit_meta(self, node):
        if node.hasattr('lang'):
            node['xml:lang'] = node['lang']
            del(node['lang'])
        meta = self.emptytag(node, 'meta', **node.non_default_attributes())
        self.add_meta(meta)