summaryrefslogtreecommitdiff
path: root/tools/dev/create_unimap.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dev/create_unimap.py')
-rwxr-xr-xtools/dev/create_unimap.py82
1 files changed, 82 insertions, 0 deletions
diff --git a/tools/dev/create_unimap.py b/tools/dev/create_unimap.py
new file mode 100755
index 000000000..1d1a2f8a0
--- /dev/null
+++ b/tools/dev/create_unimap.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+# Author: Felix Wiemann
+# Contact: Felix_Wiemann@ososo.de
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This file has been placed in the public domain.
+
+# Call: create_unimap.py < unicode.xml > unicode_latex.py
+#
+# Get unicode.xml from
+# <http://www.w3.org/2003/entities/xml/unicode.xml>.
+
+from xml.dom import minidom
+import sys
+import pprint
+
+def w(s):
+ if isinstance(s, unicode):
+ s = s.encode('utf8')
+ sys.stdout.write(s)
+
+text_map = {}
+math_map = {}
+
+class Visitor:
+
+ """Node visitor for contents of unicode.xml."""
+
+ def visit_character(self, node):
+ for n in node.childNodes:
+ if n.nodeName == 'latex':
+ code = node.attributes['dec'].value
+ if '-' in code:
+ # I don't know what this means, but we probably
+ # don't need it....
+ continue
+ if int(code) < 128:
+ # Wrong (maps "-" to "$-$", which is too wide) and
+ # unnecessary (maps "a" to "{a}").
+ continue
+ latex_code = n.childNodes[0].nodeValue.encode('ascii').strip()
+ if node.attributes['mode'].value == 'math':
+ math_map[unichr(int(code))] = '$%s$' % latex_code
+ else:
+ text_map[unichr(int(code))] = '{%s}' % latex_code
+
+def call_visitor(node, visitor=Visitor()):
+ if isinstance(node, minidom.Text):
+ name = 'Text'
+ else:
+ name = node.nodeName.replace('#', '_')
+ if hasattr(visitor, 'visit_' + name):
+ getattr(visitor, 'visit_' + name)(node)
+ for child in node.childNodes:
+ call_visitor(child)
+ if hasattr(visitor, 'depart_' + name):
+ getattr(visitor, 'depart_' + name)(node)
+
+document = minidom.parse(sys.stdin)
+call_visitor(document)
+
+unicode_map = math_map
+unicode_map.update(text_map)
+# Now unicode_map contains the text entries plus dollar-enclosed math
+# entries for those chars for which no text entry exists.
+
+print '# Author: Felix Wiemann'
+print '# Contact: Felix_Wiemann@ososo.de'
+print '# Revision: $%s$' % 'Revision'
+print '# Date: $%s$' % 'Date'
+print '# Copyright: This file has been placed in the public domain.'
+print
+print '# This is a mapping of Unicode characters to LaTeX equivalents.'
+print '# The information has been extracted from'
+print '# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by'
+print '# David Carlisle and Sebastian Rahtz.'
+print '#'
+print '# The extraction has been done by the "create_unimap.py" script'
+print '# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.'
+print
+print 'unicode_map = %s' % pprint.pformat(unicode_map, indent=0)