diff options
| author | paultremblay <paultremblay@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2011-10-20 17:41:49 +0000 |
|---|---|---|
| committer | paultremblay <paultremblay@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2011-10-20 17:41:49 +0000 |
| commit | 56ae061bdecc136db6364b2bc84afdc54a2106cf (patch) | |
| tree | de47e19500eb45b64c3d74d405b0bdbed6d77bc6 /sandbox/paultremblay/scripts | |
| parent | 039095060363071676f1e37f5cf7d7d65059be84 (diff) | |
| download | docutils-56ae061bdecc136db6364b2bc84afdc54a2106cf.tar.gz | |
rstxml2mathml_sax.py will be a faster version of rstxml2mathml.py,
which is no much simpler.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@7191 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'sandbox/paultremblay/scripts')
| -rw-r--r-- | sandbox/paultremblay/scripts/rstxml2mathml.py | 245 | ||||
| -rw-r--r-- | sandbox/paultremblay/scripts/rstxml2mathml_sax.py | 239 |
2 files changed, 272 insertions, 212 deletions
diff --git a/sandbox/paultremblay/scripts/rstxml2mathml.py b/sandbox/paultremblay/scripts/rstxml2mathml.py index bbdf1513f..6f2a39ec4 100644 --- a/sandbox/paultremblay/scripts/rstxml2mathml.py +++ b/sandbox/paultremblay/scripts/rstxml2mathml.py @@ -1,220 +1,41 @@ -#!/usr/bin/python -# $Id: sax_complete_copy.py 54 2011-04-17 15:44:41Z cynthia $ - -import xml.sax.handler -from xml.sax.handler import feature_namespaces -import os, sys, argparse -from io import StringIO +import io, argparse, sys +import xml.etree.ElementTree as etree import asciimathml -from xml.etree.ElementTree import Element, tostring -import xml.etree.cElementTree as etree -import tempfile, subprocess, os -import docutils.math.latex2mathml - -if sys.version_info < (3,): - sys.stderr.write('Only run with pyton 3\n') - sys.stderr.write('Script now quiting\n') - sys.exit(1) - - -class CopyTree(xml.sax.ContentHandler): - - - - def __init__(self, mathml): - self.__characters = '' - self.__mathml = mathml - self.__ns_dict = {'http://www.w3.org/XML/1998/namespace': "xml"} - - - def characters (self, characters): - self.__characters += characters - - - def startElementNS(self, name, qname, attrs): - self.__write_text() - ns = name[0] - el_name = name[1] - sys.stdout.write('<') - if ns: - sys.stdout.write('ns1:%s' % el_name) - else: - sys.stdout.write(el_name) - if ns: - sys.stdout.write(' xmlns:ns1="%s"' % ns) - the_keys = list(attrs.keys()) - counter = 1 - for the_key in the_keys: - counter +=1 - ns_att = the_key[0] - att_name = the_key[1] - value = attrs[the_key] - ns_prefix = self.__ns_dict.get(ns_att) - if ns_att and not ns_prefix: - sys.stderr.write('No name space for "%s"\n' % (ns_att)) - sys.exit(1) - if ns_att and ns_prefix == 'xml': - sys.stdout.write(' xml:%s="%s"' % (att_name, value)) - elif ns_att: - sys.stderr.write('Sorry, but don\'t know what to do with ns "%s"\n' % (ns_prefix)) - sys.exit(1) -# if ns_att and ns_att != ns: -# sys.stdout.write(' xmlns:ns%s="%s"' % (counter,ns_att)) -# if ns_att and ns_att == ns: -# sys.stdout.write(' ns1:%s="%s"' % (att_name, value)) - else: - sys.stdout.write(' %s="%s"' % (att_name, value)) - sys.stdout.write('>') - - - def __write_text(self): - text = xml.sax.saxutils.escape(self.__characters) - sys.stdout.write(text) - self.__characters = '' - - def endElementNS(self, name, qname): - ns = name[0] - el_name = name[1] - if (el_name == 'math_block' and self.__mathml == 'ascii') or (el_name == 'math' and self.__mathml == 'ascii'): - raw_tree = asciimathml.parse(self.__characters)[0] - math_tree = Element('math', title="%s" % self.__characters, xmlns="http://www.w3.org/1998/Math/MathML") - math_tree.append(raw_tree) - string_tree = tostring(math_tree, encoding="utf-8").decode() - sys.stdout.write(string_tree) - self.__characters = '' - elif (el_name == 'math_block' and self.__mathml == 'latex') or (el_name == 'math' and self.__mathml == 'latex'): - raw_tree = self.__tralics() - if raw_tree == None: - self.__write_text() - else: - raw_tree = raw_tree[0] - math_tree = Element('math', title="%s" % self.__characters, xmlns="http://www.w3.org/1998/Math/MathML") - math_tree.append(raw_tree) - string_tree = tostring(math_tree, encoding="utf-8").decode() - sys.stdout.write(string_tree) - self.__characters = '' - else: - self.__write_text() - if ns: - sys.stderr.write('Should not be namespace "%s" here\n' % (ns)) - sys.exit(1) - sys.stdout.write('</ns1:%s>' % el_name) - else: - sys.stdout.write('</%s>' % el_name) - - def __python_latex_math(self): - """ - Python code seriously broken - - """ - try: - mathml_tree = docutils.math.latex2mathml.parse_latex_math(self.__characters) - except SyntaxError: - return self.__characters - math_code = ''.join(mathml_tree.xml()) - return math_code - - def __tralics(self): - num, tex_file = tempfile.mkstemp(suffix='.tex') - write_obj = open(tex_file, 'w') - write_obj.write('$') - write_obj.write(self.__characters) - write_obj.write('$') - write_obj.close() - num, bogus_out = tempfile.mkstemp() - bogus_out = open(bogus_out, 'w') - p = subprocess.call(['tralics', '-silent', '-utf8output', '-noentnames', tex_file], stdout=bogus_out) - bogus_out.close() - dir_name = os.path.dirname(tex_file) - filename, ext = os.path.splitext(tex_file) - xml_file = filename + '.xml' - log_file = filename + '.log' - xml_file = os.path.join(dir_name, xml_file) - if not os.path.isfile(xml_file): - sys.stderr.write('Cannot find file %s\n"' % xml_file) - sys.stderr.write('Bug, program now quiting\n') - sys.exit(1) - tree = etree.ElementTree() - read_obj = open(xml_file, 'r') - xml_tree = tree.parse(xml_file) - found = None - while not found: - for child in xml_tree: - if child.tag == '{http://www.w3.org/1998/Math/MathML}math': - found = 1 - break - try: - xml_tree = xml_tree[0] - except IndexError: - sys.stderr.write('Could not find any latex math\n') - break - if not found: - return None - return xml_tree - """ - line_to_read = 1 - xml_string = None - while line_to_read: - line_to_read = read_obj.readline() - line = line_to_read - search_obj = regexp.search(line) - if search_obj: - xml_string = (search_obj.group(0)) - read_obj.close() - if xml_string == None: - sys.stderr.write("found no math string\n") - sys.stderr.write('Bug, not quiting\n') - return xml_string - """ - -class ConverttoMathml: - - - def __init__(self): - pass - - def __parse_args(self): - desc = """Inserts Mathmx elements into an rst document. +def parse_args(): + desc = """Inserts Mathmx elements into an rst document. In order to use the script, first run rs2txml.py on the RST file. Then run this script on that resulting file Or, in one pass: rst2xml.py <infile> | python3 rstxml2mathml.py - """ - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--mathml', choices = ['latex', 'ascii'], nargs=1 ) # much better--demand an arg; the option is still optional - parser.add_argument('in_file', default = sys.stdin, nargs='?', - help = 'the file to input; default is standard in') - args = parser.parse_args() - return args - - def convert_to_mathml(self): - import io - args = self.__parse_args() - standard_in = False - in_file = args.in_file - mathml = args.mathml - if mathml: - mathml = mathml[0] - if isinstance(in_file, io.TextIOWrapper): - standard_in = True - the_string = sys.stdin.read() - if standard_in: - read_obj = StringIO(the_string) - else: - read_obj = open(in_file, 'r') - the_handle=CopyTree(mathml) - parser = xml.sax.make_parser() - parser.setFeature(feature_namespaces, 1) - parser.setContentHandler(the_handle) - parser.setFeature("http://xml.org/sax/features/external-general-entities", True) - try: - parser.parse(read_obj) - except xml.sax._exceptions.SAXParseException as msg: - print(str(msg)) - sys.exit(1) - read_obj.close() + """ + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('in_file', default = sys.stdin, nargs='?', + help = 'the file to input; default is standard in') + args = parser.parse_args() + return args + +def convert_to_mathml(): + args = parse_args() + standard_in = False + in_file = args.in_file + if isinstance(in_file, io.TextIOWrapper): + standard_in = True + the_string = sys.stdin.read() + xml_tree = etree.fromstring(the_string) + else: + xml_tree = etree.ElementTree().parse(in_file) + for element in xml_tree.iter('math_block'): + mathml_tree = asciimathml.parse(element.text) + mathml_tree.set("title", element.text) + mathml_tree.set("xmlns", "http://www.w3.org/1998/Math/MathML") + element.append(mathml_tree) + element.text = '' + string_tree = etree.tostring(xml_tree, encoding="utf-8") + if sys.version_info < (3,): + sys.stdout.write(string_tree) + else: + sys.stdout.write(string_tree.decode()) if __name__ == '__main__': - main_obj = ConverttoMathml() - main_obj.convert_to_mathml() + convert_to_mathml() diff --git a/sandbox/paultremblay/scripts/rstxml2mathml_sax.py b/sandbox/paultremblay/scripts/rstxml2mathml_sax.py new file mode 100644 index 000000000..2673aaf61 --- /dev/null +++ b/sandbox/paultremblay/scripts/rstxml2mathml_sax.py @@ -0,0 +1,239 @@ +#!/usr/bin/python +# $Id: sax_complete_copy.py 54 2011-04-17 15:44:41Z cynthia $ + +import xml.sax.handler +from xml.sax.handler import feature_namespaces +import os, sys, argparse +from io import StringIO +import asciimathml +from xml.etree.ElementTree import Element, tostring +import xml.etree.cElementTree as etree +import tempfile, subprocess, os +import docutils.math.latex2mathml + +if sys.version_info < (3,): + sys.stderr.write('Only run with pyton 3\n') + sys.stderr.write('Script now quiting\n') + sys.exit(1) + + +class CopyTree(xml.sax.ContentHandler): + + + + def __init__(self, mathml): + self.__characters = '' + self.__mathml = mathml + self.__ns_dict = {'http://www.w3.org/XML/1998/namespace': "xml"} + + + def characters (self, characters): + self.__characters += characters + + + def startElementNS(self, name, qname, attrs): + self.__write_text() + ns = name[0] + el_name = name[1] + sys.stdout.write('<') + if ns: + sys.stdout.write('ns1:%s' % el_name) + else: + sys.stdout.write(el_name) + if ns: + sys.stdout.write(' xmlns:ns1="%s"' % ns) + + the_keys = list(attrs.keys()) + counter = 1 + for the_key in the_keys: + counter +=1 + ns_att = the_key[0] + att_name = the_key[1] + value = attrs[the_key] + ns_prefix = self.__ns_dict.get(ns_att) + if ns_att and not ns_prefix: + sys.stderr.write('No name space for "%s"\n' % (ns_att)) + sys.exit(1) + if ns_att and ns_prefix == 'xml': + sys.stdout.write(' xml:%s="%s"' % (att_name, value)) + elif ns_att: + sys.stderr.write('Sorry, but don\'t know what to do with ns "%s"\n' % (ns_prefix)) + sys.exit(1) +# if ns_att and ns_att != ns: +# sys.stdout.write(' xmlns:ns%s="%s"' % (counter,ns_att)) +# if ns_att and ns_att == ns: +# sys.stdout.write(' ns1:%s="%s"' % (att_name, value)) + else: + sys.stdout.write(' %s="%s"' % (att_name, value)) + sys.stdout.write('>') + + + + def __write_text(self): + text = xml.sax.saxutils.escape(self.__characters) + sys.stdout.write(text) + self.__characters = '' + + def endElementNS(self, name, qname): + ns = name[0] + el_name = name[1] + if (el_name == 'math_block' and self.__mathml == 'ascii') or (el_name == 'math' and self.__mathml == 'ascii'): + raw_tree = asciimathml.parse(self.__characters)[0] + math_tree = Element('math', title="%s" % self.__characters, xmlns="http://www.w3.org/1998/Math/MathML") + math_tree.append(raw_tree) + string_tree = tostring(math_tree, encoding="utf-8").decode() + sys.stdout.write(string_tree) + self.__characters = '' + elif (el_name == 'math_block' and self.__mathml == 'latex') or (el_name == 'math' and self.__mathml == 'latex'): + raw_tree = self.__tralics() + if raw_tree == None: + self.__write_text() + else: + raw_tree = raw_tree[0] + math_tree = Element('math', title="%s" % self.__characters, xmlns="http://www.w3.org/1998/Math/MathML") + math_tree.append(raw_tree) + string_tree = tostring(math_tree, encoding="utf-8").decode() + sys.stdout.write(string_tree) + self.__characters = '' + else: + self.__write_text() + if ns: + sys.stderr.write('Should not be namespace "%s" here\n' % (ns)) + sys.exit(1) + sys.stdout.write('</ns1:%s>' % el_name) + else: + sys.stdout.write('</%s>' % el_name) + + def __python_latex_math(self): + """ + Python code seriously broken + + """ + try: + mathml_tree = docutils.math.latex2mathml.parse_latex_math(self.__characters) + except SyntaxError: + return self.__characters + math_code = ''.join(mathml_tree.xml()) + return math_code + + def __tralics(self): + num, tex_file = tempfile.mkstemp(suffix='.tex') + write_obj = open(tex_file, 'w') + write_obj.write('$') + write_obj.write(self.__characters) + write_obj.write('$') + write_obj.close() + num, bogus_out = tempfile.mkstemp() + bogus_out = open(bogus_out, 'w') + p = subprocess.call(['tralics', '-silent', '-utf8output', '-noentnames', tex_file], stdout=bogus_out) + bogus_out.close() + dir_name = os.path.dirname(tex_file) + filename, ext = os.path.splitext(tex_file) + xml_file = filename + '.xml' + log_file = filename + '.log' + xml_file = os.path.join(dir_name, xml_file) + if not os.path.isfile(xml_file): + sys.stderr.write('Cannot find file %s\n"' % xml_file) + sys.stderr.write('Bug, program now quiting\n') + sys.exit(1) + tree = etree.ElementTree() + read_obj = open(xml_file, 'r') + xml_tree = tree.parse(xml_file) + found = None + while not found: + for child in xml_tree: + if child.tag == '{http://www.w3.org/1998/Math/MathML}math': + found = 1 + break + try: + xml_tree = xml_tree[0] + except IndexError: + sys.stderr.write('Could not find any latex math\n') + break + if not found: + return None + return xml_tree + """ + line_to_read = 1 + xml_string = None + while line_to_read: + line_to_read = read_obj.readline() + line = line_to_read + search_obj = regexp.search(line) + if search_obj: + xml_string = (search_obj.group(0)) + read_obj.close() + if xml_string == None: + sys.stderr.write("found no math string\n") + sys.stderr.write('Bug, not quiting\n') + return xml_string + """ + +class ConverttoMathml: + + + def __init__(self): + pass + + def __parse_args(self): + desc = """Inserts Mathmx elements into an rst document. +In order to use the script, first run rs2txml.py on the RST file. +Then run this script on that resulting file +Or, in one pass: rst2xml.py <infile> | python3 rstxml2mathml.py + """ + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--mathml', choices = ['latex', 'ascii'], nargs=1 ) # much better--demand an arg; the option is still optional + parser.add_argument('in_file', default = sys.stdin, nargs='?', + help = 'the file to input; default is standard in') + args = parser.parse_args() + return args + + def convert_to_mathml(self): + import io + tree = etree.ElementTree() + args = self.__parse_args() + standard_in = False + in_file = args.in_file + if isinstance(in_file, io.TextIOWrapper): + standard_in = True + the_string = sys.stdin.read() + xml_tree = tree.fromstring(the_string) + else: + xml_tree = tree.parse(in_file) + for element in xml_tree.iter('math_block'): + mathml_tree = asciimathml.parse(element.text) + string_tree = tostring(mathml_tree).decode() + element.append(etree.XML(string_tree)) + string_tree = tostring(xml_tree, encoding="utf-8").decode() + sys.stdout.write(string_tree) + + def convert_to_mathml_(self): + import io + args = self.__parse_args() + standard_in = False + in_file = args.in_file + mathml = args.mathml + if mathml: + mathml = mathml[0] + if isinstance(in_file, io.TextIOWrapper): + standard_in = True + the_string = sys.stdin.read() + if standard_in: + read_obj = StringIO(the_string) + else: + read_obj = open(in_file, 'r') + the_handle=CopyTree(mathml) + parser = xml.sax.make_parser() + parser.setFeature(feature_namespaces, 1) + parser.setContentHandler(the_handle) + parser.setFeature("http://xml.org/sax/features/external-general-entities", True) + try: + parser.parse(read_obj) + except xml.sax._exceptions.SAXParseException as msg: + print(str(msg)) + sys.exit(1) + read_obj.close() + +if __name__ == '__main__': + main_obj = ConverttoMathml() + main_obj.convert_to_mathml() |
