summaryrefslogtreecommitdiff
path: root/sandbox/xml2rst/xml2rstlib/markup.py
blob: 880bcfd790f2e9522ced6d30f8cbc3d69d493555 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Copyright (C) 2011-2013 Stefan Merten

# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation; either version 2 of the License,
# or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.

"""
Python based conversion.
"""

###############################################################################
###############################################################################
# Import

import re
import docutils
import docutils.parsers.rst.states as states

__docformat__ = 'reStructuredText'

###############################################################################
###############################################################################
# Classes

class Inliner(states.Inliner):
    """
    Recognizer for inline markup. Derive this from the original inline
    markup parser for best results.
    """

    # Copy static attributes from super class
    vars().update(vars(states.Inliner))

    def quoteInline(self, text):
        """
        `text`: ``str``
          Return `text` with inline markup quoted.
        """
        # Method inspired by `states.Inliner.parse`
        self.document = docutils.utils.new_document("<string>")
        self.document.settings.trim_footnote_reference_space = False
        self.document.settings.character_level_inline_markup = False
        self.document.settings.pep_references = False
        self.document.settings.rfc_references = False

        self.init_customizations(self.document.settings)

        self.reporter = self.document.reporter
        self.reporter.stream = None
        self.language = None
        self.parent = self.document
        remaining = docutils.utils.escape2null(text)
        checked = ""
        processed = []
        unprocessed = []
        messages = []
        while remaining:
            original = remaining
            match = self.patterns.initial.search(remaining)
            if match:
                groups = match.groupdict()
                method = self.dispatch[groups['start'] or groups['backquote']
                                       or groups['refend'] or groups['fnend']]
                ( before, inlines, remaining, sysmessages,
                  ) = method(self, match, 0)
                checked += before
                if inlines:
                    assert len(inlines) == 1, "More than one inline found"
                    inline = original[len(before)
                                      :len(original) - len(remaining)]
                    rolePfx = re.search("^:" + self.simplename + ":(?=`)",
                                        inline)
                    refSfx = re.search("_+$", inline)
                    if rolePfx:
                        # Prefixed roles need to be quoted in the middle
                        checked += (inline[:rolePfx.end()] + "\\"
                                    + inline[rolePfx.end():])
                    elif refSfx and not re.search("^`", inline):
                        # Pure reference markup needs to be quoted at the end
                        checked += (inline[:refSfx.start()] + "\\"
                                    + inline[refSfx.start():])
                    else:
                        # Quote other inlines by prefixing
                        checked += "\\" + inline
            else:
                checked += remaining
                break
        # Quote all original backslashes
        checked = re.sub('\x00', "\\\x00", checked)
        return docutils.utils.unescape(checked, 1)

###############################################################################

class Text():
    """
    Functions for computing valid reStructuredText plain text.
    """

    inliner = Inliner()

    @staticmethod
    def plain(text, indent, literal):
        """
        Return a plain text preventing further interpretation by
        reStructuredText. Text may contain linefeeds.

        :Parameters:

          text : str
            The string to turn into output text.

          indent : str
            The string to use for indent in case of internal linefeeds.

          literal : bool
            Output literally instead of quoting.
        """
        sep = "\n" + indent
        if literal:
            quoted = text
        else:
            quoted = Text.inliner.quoteInline(text)
        return sep.join(quoted.split("\n"))

#print(Text.plain("Some \\ back\slashes", ""))

# indent
# directive
# field_names
# substitution
# inline markup
# token
# label
# start_delimiter
# end_delimiter
# target_definition