diff options
Diffstat (limited to 'docutils/tools/dev/create_unimap.py')
-rwxr-xr-x | docutils/tools/dev/create_unimap.py | 82 |
1 files changed, 0 insertions, 82 deletions
diff --git a/docutils/tools/dev/create_unimap.py b/docutils/tools/dev/create_unimap.py deleted file mode 100755 index 1d1a2f8a0..000000000 --- a/docutils/tools/dev/create_unimap.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# Author: Felix Wiemann -# Contact: Felix_Wiemann@ososo.de -# Revision: $Revision$ -# Date: $Date$ -# Copyright: This file has been placed in the public domain. - -# Call: create_unimap.py < unicode.xml > unicode_latex.py -# -# Get unicode.xml from -# <http://www.w3.org/2003/entities/xml/unicode.xml>. - -from xml.dom import minidom -import sys -import pprint - -def w(s): - if isinstance(s, unicode): - s = s.encode('utf8') - sys.stdout.write(s) - -text_map = {} -math_map = {} - -class Visitor: - - """Node visitor for contents of unicode.xml.""" - - def visit_character(self, node): - for n in node.childNodes: - if n.nodeName == 'latex': - code = node.attributes['dec'].value - if '-' in code: - # I don't know what this means, but we probably - # don't need it.... - continue - if int(code) < 128: - # Wrong (maps "-" to "$-$", which is too wide) and - # unnecessary (maps "a" to "{a}"). - continue - latex_code = n.childNodes[0].nodeValue.encode('ascii').strip() - if node.attributes['mode'].value == 'math': - math_map[unichr(int(code))] = '$%s$' % latex_code - else: - text_map[unichr(int(code))] = '{%s}' % latex_code - -def call_visitor(node, visitor=Visitor()): - if isinstance(node, minidom.Text): - name = 'Text' - else: - name = node.nodeName.replace('#', '_') - if hasattr(visitor, 'visit_' + name): - getattr(visitor, 'visit_' + name)(node) - for child in node.childNodes: - call_visitor(child) - if hasattr(visitor, 'depart_' + name): - getattr(visitor, 'depart_' + name)(node) - -document = minidom.parse(sys.stdin) -call_visitor(document) - -unicode_map = math_map -unicode_map.update(text_map) -# Now unicode_map contains the text entries plus dollar-enclosed math -# entries for those chars for which no text entry exists. - -print '# Author: Felix Wiemann' -print '# Contact: Felix_Wiemann@ososo.de' -print '# Revision: $%s$' % 'Revision' -print '# Date: $%s$' % 'Date' -print '# Copyright: This file has been placed in the public domain.' -print -print '# This is a mapping of Unicode characters to LaTeX equivalents.' -print '# The information has been extracted from' -print '# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by' -print '# David Carlisle and Sebastian Rahtz.' -print '#' -print '# The extraction has been done by the "create_unimap.py" script' -print '# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.' -print -print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0) |