summaryrefslogtreecommitdiff
path: root/sandbox/jensj/latex_math/tools/rst2mathml.py
diff options
context:
space:
mode:
Diffstat (limited to 'sandbox/jensj/latex_math/tools/rst2mathml.py')
-rw-r--r--sandbox/jensj/latex_math/tools/rst2mathml.py522
1 files changed, 0 insertions, 522 deletions
diff --git a/sandbox/jensj/latex_math/tools/rst2mathml.py b/sandbox/jensj/latex_math/tools/rst2mathml.py
deleted file mode 100644
index b7b1aa0fa..000000000
--- a/sandbox/jensj/latex_math/tools/rst2mathml.py
+++ /dev/null
@@ -1,522 +0,0 @@
-#!/usr/bin/env python
-
-"""
-A minimal front end to the Docutils Publisher, producing HTML + MathML.
-"""
-
-try:
- import locale
- locale.setlocale(locale.LC_ALL, '')
-except:
- pass
-
-from docutils.parsers.rst.roles import register_canonical_role
-from docutils import nodes
-from docutils.writers.html4css1 import HTMLTranslator
-from docutils.parsers.rst.directives import _directives
-from docutils.core import publish_cmdline, default_description
-
-
-# Define LaTeX math node:
-class latex_math(nodes.Element):
- tagname = '#latex-math'
- def __init__(self, rawsource, mathml_tree):
- nodes.Element.__init__(self, rawsource)
- self.mathml_tree = mathml_tree
-
-# Register role:
-def latex_math_role(role, rawtext, text, lineno, inliner,
- options={}, content=[]):
- i = rawtext.find('`')
- latex = rawtext[i+1:-1]
- try:
- mathml_tree = parse_latex_math(latex, inline=True)
- except SyntaxError, msg:
- msg = inliner.reporter.error(msg, line=lineno)
- prb = inliner.problematic(rawtext, rawtext, msg)
- return [prb], [msg]
- node = latex_math(rawtext, mathml_tree)
- return [node], []
-register_canonical_role('latex-math', latex_math_role)
-
-
-# Register directive:
-def latex_math_directive(name, arguments, options, content, lineno,
- content_offset, block_text, state, state_machine):
- latex = ''.join(content)
- try:
- mathml_tree = parse_latex_math(latex, inline=False)
- except SyntaxError, msg:
- error = state_machine.reporter.error(
- msg, nodes.literal_block(block_text, block_text), line=lineno)
- return [error]
- node = latex_math(block_text, mathml_tree)
- return [node]
-latex_math_directive.arguments = None
-latex_math_directive.options = {}
-latex_math_directive.content = 1
-_directives['latex-math'] = latex_math_directive
-
-
-# Add visit/depart methods to HTML-Translator:
-def visit_latex_math(self, node):
- mathml = ''.join(node.mathml_tree.xml())
- string = """<math xmlns="http://www.w3.org/1998/Math/MathML">
- <semantics>
- %s
- </semantics>
- </math>
- """ % mathml
- inline = isinstance(node.parent, nodes.TextElement)
- if not inline:
- string += '<br/>\n'
- self.body.append(string)
- if not self.has_mathml_dtd:
- doctype = ('<!DOCTYPE html'
- ' PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"'
- ' "http://www.w3.org/Math/DTD/mathml2/'
- 'xhtml-math11-f.dtd">\n')
- if self.settings.xml_declaration:
- self.head_prefix[1] = doctype
- else:
- self.head_prefix[0] = doctype
- self.has_mathml_dtd = True
-def depart_latex_math(self, node):
- pass
-HTMLTranslator.visit_latex_math = visit_latex_math
-HTMLTranslator.depart_latex_math = depart_latex_math
-HTMLTranslator.has_mathml_dtd = False
-
-
-# LaTeX to MathML translation stuff:
-class math:
- """Base class for MathML elements."""
-
- nchildren = 1000000
- """Required number of children"""
-
- def __init__(self, children=None):
- """math([children]) -> MathML element
-
- children can be one child or a list of children."""
-
- self.children = []
- if children is not None:
- if type(children) is list:
- for child in children:
- self.append(child)
- else:
- # Only one child:
- self.append(children)
-
- def __repr__(self):
- if hasattr(self, 'children'):
- return self.__class__.__name__ + '(%s)' % \
- ','.join([repr(child) for child in self.children])
- else:
- return self.__class__.__name__
-
- def full(self):
- """Room for more children?"""
-
- return len(self.children) >= self.nchildren
-
- def append(self, child):
- """append(child) -> element
-
- Appends child and returns self if self is not full or first
- non-full parent."""
-
- assert not self.full()
- self.children.append(child)
- child.parent = self
- node = self
- while node.full():
- node = node.parent
- return node
-
- def delete_child(self):
- """delete_child() -> child
-
- Delete last child and return it."""
-
- child = self.children[-1]
- del self.children[-1]
- return child
-
- def close(self):
- """close() -> parent
-
- Close element and return first non-full element."""
-
- parent = self.parent
- while parent.full():
- parent = parent.parent
- return parent
-
- def xml(self):
- """xml() -> xml-string"""
-
- return self.xml_start() + self.xml_body() + self.xml_end()
-
- def xml_start(self):
- return ['<%s>' % self.__class__.__name__]
-
- def xml_end(self):
- return ['</%s>' % self.__class__.__name__]
-
- def xml_body(self):
- xml = []
- for child in self.children:
- xml.extend(child.xml())
- return xml
-
-class mrow(math): pass
-class mtable(math): pass
-class mtr(mrow): pass
-class mtd(mrow): pass
-
-class mx(math):
- """Base class for mo, mi, and mn"""
-
- nchildren = 0
- def __init__(self, data):
- self.data = data
-
- def xml_body(self):
- return [self.data]
-
-class mo(mx):
- translation = {'<': '&lt;', '>': '&gt;'}
- def xml_body(self):
- return [self.translation.get(self.data, self.data)]
-
-class mi(mx): pass
-class mn(mx): pass
-
-class msub(math):
- nchildren = 2
-
-class msup(math):
- nchildren = 2
-
-class msqrt(math):
- nchildren = 1
-
-class mroot(math):
- nchildren = 2
-
-class mfrac(math):
- nchildren = 2
-
-class msubsup(math):
- nchildren = 3
- def __init__(self, children=None, reversed=False):
- self.reversed = reversed
- math.__init__(self, children)
-
- def xml(self):
- if self.reversed:
-## self.children[1:3] = self.children[2:0:-1]
- self.children[1:3] = [self.children[2], self.children[1]]
- self.reversed = False
- return math.xml(self)
-
-class mfenced(math):
- translation = {'\\{': '{', '\\langle': u'\u2329',
- '\\}': '}', '\\rangle': u'\u232A',
- '.': ''}
- def __init__(self, par):
- self.openpar = par
- math.__init__(self)
-
- def xml_start(self):
- open = self.translation.get(self.openpar, self.openpar)
- close = self.translation.get(self.closepar, self.closepar)
- return ['<mfenced open="%s" close="%s">' % (open, close)]
-
-class mspace(math):
- nchildren = 0
-
-class mstyle(math):
- def __init__(self, children=None, nchildren=None, **kwargs):
- if nchildren is not None:
- self.nchildren = nchildren
- math.__init__(self, children)
- self.attrs = kwargs
-
- def xml_start(self):
- return ['<mstyle '] + ['%s="%s"' % item
- for item in self.attrs.items()] + ['>']
-
-class mover(math):
- nchildren = 2
- reversed = True
- def xml(self):
- if self.reversed:
- self.children.reverse()
- self.reversed = False
- return math.xml(self)
-
-class mtext(math):
- nchildren = 0
- def __init__(self, text):
- self.text = text
-
- def xml_body(self):
- return [self.text]
-
-
-over = {'tilde': '~',
- 'hat': '^',
- 'bar': '_',
- 'vec': u'\u20D7'}
-
-Greek = {
- # Upper case greek letters:
- 'Phi': u'\u03a6', 'Xi': u'\u039e', 'Sigma': u'\u03a3', 'Psi': u'\u03a8', 'Delta': u'\u0394', 'Theta': u'\u0398', 'Upsilon': u'\u03d2', 'Pi': u'\u03a0', 'Omega': u'\u03a9', 'Gamma': u'\u0393', 'Lambda': u'\u039b'}
-greek = {
- # Lower case greek letters:
- 'tau': u'\u03c4', 'phi': u'\u03d5', 'xi': u'\u03be', 'iota': u'\u03b9', 'epsilon': u'\u03f5', 'varrho': u'\u03f1', 'varsigma': u'\u03c2', 'beta': u'\u03b2', 'psi': u'\u03c8', 'rho': u'\u03c1', 'delta': u'\u03b4', 'alpha': u'\u03b1', 'zeta': u'\u03b6', 'omega': u'\u03c9', 'varepsilon': u'\u03b5', 'kappa': u'\u03ba', 'vartheta': u'\u03d1', 'chi': u'\u03c7', 'upsilon': u'\u03c5', 'sigma': u'\u03c3', 'varphi': u'\u03c6', 'varpi': u'\u03d6', 'mu': u'\u03bc', 'eta': u'\u03b7', 'theta': u'\u03b8', 'pi': u'\u03c0', 'varkappa': u'\u03f0', 'nu': u'\u03bd', 'gamma': u'\u03b3', 'lambda': u'\u03bb'}
-
-special = {
- # Binary operation symbols:
- 'wedge': u'\u2227', 'diamond': u'\u22c4', 'star': u'\u22c6', 'amalg': u'\u2a3f', 'ast': u'\u2217', 'odot': u'\u2299', 'triangleleft': u'\u25c1', 'bigtriangleup': u'\u25b3', 'ominus': u'\u2296', 'ddagger': u'\u2021', 'wr': u'\u2240', 'otimes': u'\u2297', 'sqcup': u'\u2294', 'oplus': u'\u2295', 'bigcirc': u'\u25cb', 'oslash': u'\u2298', 'sqcap': u'\u2293', 'bullet': u'\u2219', 'cup': u'\u222a', 'cdot': u'\u22c5', 'cap': u'\u2229', 'bigtriangledown': u'\u25bd', 'times': u'\xd7', 'setminus': u'\u2216', 'circ': u'\u2218', 'vee': u'\u2228', 'uplus': u'\u228e', 'mp': u'\u2213', 'dagger': u'\u2020', 'triangleright': u'\u25b7', 'div': u'\xf7', 'pm': u'\xb1',
- # Relation symbols:
- 'subset': u'\u2282', 'propto': u'\u221d', 'geq': u'\u2265', 'ge': u'\u2265', 'sqsubset': u'\u228f', 'Join': u'\u2a1d', 'frown': u'\u2322', 'models': u'\u22a7', 'supset': u'\u2283', 'in': u'\u2208', 'doteq': u'\u2250', 'dashv': u'\u22a3', 'gg': u'\u226b', 'leq': u'\u2264', 'succ': u'\u227b', 'vdash': u'\u22a2', 'cong': u'\u2245', 'simeq': u'\u2243', 'subseteq': u'\u2286', 'parallel': u'\u2225', 'equiv': u'\u2261', 'ni': u'\u220b', 'le': u'\u2264', 'approx': u'\u2248', 'precsim': u'\u227e', 'sqsupset': u'\u2290', 'll': u'\u226a', 'sqsupseteq': u'\u2292', 'mid': u'\u2223', 'prec': u'\u227a', 'succsim': u'\u227f', 'bowtie': u'\u22c8', 'perp': u'\u27c2', 'sqsubseteq': u'\u2291', 'asymp': u'\u224d', 'smile': u'\u2323', 'supseteq': u'\u2287', 'sim': u'\u223c', 'neq': u'\u2260',
- # Arrow symbols:
- 'searrow': u'\u2198', 'updownarrow': u'\u2195', 'Uparrow': u'\u21d1', 'longleftrightarrow': u'\u27f7', 'Leftarrow': u'\u21d0', 'longmapsto': u'\u27fc', 'Longleftarrow': u'\u27f8', 'nearrow': u'\u2197', 'hookleftarrow': u'\u21a9', 'downarrow': u'\u2193', 'Leftrightarrow': u'\u21d4', 'longrightarrow': u'\u27f6', 'rightharpoondown': u'\u21c1', 'longleftarrow': u'\u27f5', 'rightarrow': u'\u2192', 'Updownarrow': u'\u21d5', 'rightharpoonup': u'\u21c0', 'Longleftrightarrow': u'\u27fa', 'leftarrow': u'\u2190', 'mapsto': u'\u21a6', 'nwarrow': u'\u2196', 'uparrow': u'\u2191', 'leftharpoonup': u'\u21bc', 'leftharpoondown': u'\u21bd', 'Downarrow': u'\u21d3', 'leftrightarrow': u'\u2194', 'Longrightarrow': u'\u27f9', 'swarrow': u'\u2199', 'hookrightarrow': u'\u21aa', 'Rightarrow': u'\u21d2',
- # Miscellaneous symbold:
- 'infty': u'\u221e', 'surd': u'\u221a', 'partial': u'\u2202', 'ddots': u'\u22f1', 'exists': u'\u2203', 'flat': u'\u266d', 'diamondsuit': u'\u2662', 'wp': u'\u2118', 'spadesuit': u'\u2660', 'Re': u'\u211c', 'vdots': u'\u22ee', 'aleph': u'\u2135', 'clubsuit': u'\u2663', 'sharp': u'\u266f', 'angle': u'\u2220', 'prime': u'\u2032', 'natural': u'\u266e', 'ell': u'\u2113', 'neg': u'\xac', 'top': u'\u22a4', 'nabla': u'\u2207', 'bot': u'\u22a5', 'heartsuit': u'\u2661', 'cdots': u'\u22ef', 'Im': u'\u2111', 'forall': u'\u2200', 'imath': u'\u0131', 'hbar': u'\u210f', 'emptyset': u'\u2205',
- # Variable-sized symbols:
- 'bigotimes': u'\u2a02', 'coprod': u'\u2210', 'int': u'\u222b', 'sum': u'\u2211', 'bigodot': u'\u2a00', 'bigcup': u'\u22c3', 'biguplus': u'\u2a04', 'bigcap': u'\u22c2', 'bigoplus': u'\u2a01', 'oint': u'\u222e', 'bigvee': u'\u22c1', 'bigwedge': u'\u22c0', 'prod': u'\u220f',
- # Braces:
- 'langle': u'\u2329', 'rangle': u'\u232A'}
-
-functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh',
- 'cot', 'coth', 'csc', 'deg', 'det', 'dim',
- 'exp', 'gcd', 'hom', 'inf', 'ker', 'lg',
- 'lim', 'liminf', 'limsup', 'ln', 'log', 'max',
- 'min', 'Pr', 'sec', 'sin', 'sinh', 'sup',
- 'tan', 'tanh',
- 'injlim', 'varinjlim', 'varlimsup',
- 'projlim', 'varliminf', 'varprojlim']
-
-
-def parse_latex_math(string, inline=True):
- """parse_latex_math(string [,inline]) -> MathML-tree
-
- Returns a MathML-tree parsed from string. inline=True is for
- inline math and inline=False is for displayed math.
-
- tree is the whole tree and node is the current element."""
-
- # Normalize white-space:
- string = ' '.join(string.split())
-
- if inline:
- node = mrow()
- tree = node
- else:
- node = mtd()
- tree = mstyle(mtable(mtr(node)), displaystyle='true')
-
- while len(string) > 0:
- n = len(string)
- c = string[0]
- skip = 1 # number of characters consumed
- if n > 1:
- c2 = string[1]
- else:
- c2 = ''
-## print n, string, c, c2, node.__class__.__name__
- if c == ' ':
- pass
- elif c == '\\':
- if c2 in '{}':
- node = node.append(mo(c2))
- skip = 2
- elif c2 == ' ':
- node = node.append(mspace())
- skip = 2
- elif c2.isalpha():
- # We have a LaTeX-name:
- i = 2
- while i < n and string[i].isalpha():
- i += 1
- name = string[1:i]
- node, skip = handle_keyword(name, node, string[i:])
- skip += i
- elif c2 == '\\':
- # End of a row:
- entry = mtd()
- row = mtr(entry)
- node.close().close().append(row)
- node = entry
- skip = 2
- else:
- raise SyntaxError, 'Syntax error!'
- elif c.isalpha():
- node = node.append(mi(c))
- elif c.isdigit():
- node = node.append(mn(c))
- elif c in '+-/()[]|=<>,.':
- node = node.append(mo(c))
- elif c == '_':
- child = node.delete_child()
- if isinstance(child, msup):
- sub = msubsup(child.children[0:2], reversed=True)
- else:
- sub = msub(child)
- node.append(sub)
- node = sub
- elif c == '^':
- child = node.delete_child()
- if isinstance(child, msub):
- sup = msubsup(child.children[0:2])
- else:
- sup = msup(child)
- node.append(sup)
- node = sup
- elif c == '{':
- row = mrow()
- node.append(row)
- node = row
- elif c == '}':
- node = node.close()
- elif c == '&':
- entry = mtd()
- node.close().append(entry)
- node = entry
- else:
- raise SyntaxError, 'Syntax error!'
- string = string[skip:]
- return tree
-
-
-mathbb = {'A': u'\U0001D538',
- 'B': u'\U0001D539',
- 'C': u'\u2102',
- 'D': u'\U0001D53B',
- 'E': u'\U0001D53C',
- 'F': u'\U0001D53D',
- 'G': u'\U0001D53E',
- 'H': u'\u210D',
- 'I': u'\U0001D540',
- 'J': u'\U0001D541',
- 'K': u'\U0001D542',
- 'L': u'\U0001D543',
- 'M': u'\U0001D544',
- 'N': u'\u2115',
- 'O': u'\U0001D546',
- 'P': u'\u2119',
- 'Q': u'\u211A',
- 'R': u'\u211D',
- 'S': u'\U0001D54A',
- 'T': u'\U0001D54B',
- 'U': u'\U0001D54C',
- 'V': u'\U0001D54D',
- 'W': u'\U0001D54E',
- 'X': u'\U0001D54F',
- 'Y': u'\U0001D550',
- 'Z': u'\u2124'}
-
-negatables = {'=': u'\u2260',
- '\in': u'\u2209',
- '\equiv': u'\u2262'}
-
-
-def handle_keyword(name, node, string):
- skip = 0
- if len(string) > 0 and string[0] == ' ':
- string = string[1:]
- skip = 1
- if name == 'begin':
- if not string.startswith('{matrix}'):
- raise SyntaxError, 'Expected "\begin{matrix}"!'
- skip += 8
- entry = mtd()
- table = mtable(mtr(entry))
- node.append(table)
- node = entry
- elif name == 'end':
- if not string.startswith('{matrix}'):
- raise SyntaxError, 'Expected "\end{matrix}"!'
- skip += 8
- node = node.close().close().close()
- elif name == 'text':
- if string[0] != '{':
- raise SyntaxError, 'Expected "\text{...}"!'
- i = string.find('}')
- if i == -1:
- raise SyntaxError, 'Expected "\text{...}"!'
- node = node.append(mtext(string[1:i]))
- skip += i + 1
- elif name == 'sqrt':
- sqrt = msqrt()
- node.append(sqrt)
- node = sqrt
- elif name == 'frac':
- frac = mfrac()
- node.append(frac)
- node = frac
- elif name == 'left':
- for par in ['(', '[', '|', '\\{', '\\langle', '.']:
- if string.startswith(par):
- break
- else:
- raise SyntaxError, 'Missing left-brace!'
- fenced = mfenced(par)
- node.append(fenced)
- node = fenced
- skip += len(par)
- elif name == 'right':
- for par in [')', ']', '|', '\\}', '\\rangle', '.']:
- if string.startswith(par):
- break
- else:
- raise SyntaxError, 'Missing right-brace!'
- node.closepar = par
- node = node.close()
- skip += len(par)
- elif name == 'not':
- for operator in negatables:
- if string.startswith(operator):
- break
- else:
- raise SyntaxError, 'Expected something to negate: "\\not ..."!'
- node = node.append(mo(negatables[operator]))
- skip += len(operator)
- elif name == 'mathbf':
- style = mstyle(nchildren=1, fontweight='bold')
- node.append(style)
- node = style
- elif name == 'mathbb':
- if string[0] != '{' or not string[1].isupper() or string[2] != '}':
- raise SyntaxError, 'Expected something like "\mathbb{A}"!'
- node = node.append(mi(mathbb[string[1]]))
- skip += 3
- elif name in greek:
- node = node.append(mi(greek[name]))
- elif name in Greek:
- node = node.append(mo(Greek[name]))
- elif name in special:
- node = node.append(mo(special[name]))
- elif name in functions:
- node = node.append(mo(name))
- else:
- chr = over.get(name)
- if chr is not None:
- ovr = mover(mo(chr))
- node.append(ovr)
- node = ovr
- else:
- raise SyntaxError, 'Unknown LaTeX command: ' + name
-
- return node, skip
-
-
-description = ('Generates (X)HTML documents from standalone reStructuredText '
- 'sources. ' + default_description)
-
-publish_cmdline(writer_name='html', description=description)