diff options
author | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 14:49:51 +0100 |
---|---|---|
committer | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 14:49:51 +0100 |
commit | a498da43c7fdb9f24b73680c02a4a3588cc62d9a (patch) | |
tree | daf8119dae1749b5165b68033a1b23a7375ce9ce /i18n/hggettext | |
download | mercurial-tarball-a498da43c7fdb9f24b73680c02a4a3588cc62d9a.tar.gz |
Tarball conversion
Diffstat (limited to 'i18n/hggettext')
-rwxr-xr-x | i18n/hggettext | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/i18n/hggettext b/i18n/hggettext new file mode 100755 index 0000000..983459a --- /dev/null +++ b/i18n/hggettext @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# hggettext - carefully extract docstrings for Mercurial +# +# Copyright 2009 Matt Mackall <mpm@selenic.com> and others +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +# The normalize function is taken from pygettext which is distributed +# with Python under the Python License, which is GPL compatible. + +"""Extract docstrings from Mercurial commands. + +Compared to pygettext, this script knows about the cmdtable and table +dictionaries used by Mercurial, and will only extract docstrings from +functions mentioned therein. + +Use xgettext like normal to extract strings marked as translatable and +join the message cataloges to get the final catalog. +""" + +import os, sys, inspect + + +def escape(s): + # The order is important, the backslash must be escaped first + # since the other replacements introduce new backslashes + # themselves. + s = s.replace('\\', '\\\\') + s = s.replace('\n', '\\n') + s = s.replace('\r', '\\r') + s = s.replace('\t', '\\t') + s = s.replace('"', '\\"') + return s + + +def normalize(s): + # This converts the various Python string types into a format that + # is appropriate for .po files, namely much closer to C style. + lines = s.split('\n') + if len(lines) == 1: + s = '"' + escape(s) + '"' + else: + if not lines[-1]: + del lines[-1] + lines[-1] = lines[-1] + '\n' + lines = map(escape, lines) + lineterm = '\\n"\n"' + s = '""\n"' + lineterm.join(lines) + '"' + return s + + +def poentry(path, lineno, s): + return ('#: %s:%d\n' % (path, lineno) + + 'msgid %s\n' % normalize(s) + + 'msgstr ""\n') + + +def offset(src, doc, name, default): + """Compute offset or issue a warning on stdout.""" + # Backslashes in doc appear doubled in src. + end = src.find(doc.replace('\\', '\\\\')) + if end == -1: + # This can happen if the docstring contains unnecessary escape + # sequences such as \" in a triple-quoted string. The problem + # is that \" is turned into " and so doc wont appear in src. + sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n" + % (name, default)) + return default + else: + return src.count('\n', 0, end) + + +def importpath(path): + """Import a path like foo/bar/baz.py and return the baz module.""" + if path.endswith('.py'): + path = path[:-3] + if path.endswith('/__init__'): + path = path[:-9] + path = path.replace('/', '.') + mod = __import__(path) + for comp in path.split('.')[1:]: + mod = getattr(mod, comp) + return mod + + +def docstrings(path): + """Extract docstrings from path. + + This respects the Mercurial cmdtable/table convention and will + only extract docstrings from functions mentioned in these tables. + """ + mod = importpath(path) + if mod.__doc__: + src = open(path).read() + lineno = 1 + offset(src, mod.__doc__, path, 7) + print poentry(path, lineno, mod.__doc__) + + functions = list(getattr(mod, 'i18nfunctions', [])) + functions = [(f, True) for f in functions] + + cmdtable = getattr(mod, 'cmdtable', {}) + if not cmdtable: + # Maybe we are processing mercurial.commands? + cmdtable = getattr(mod, 'table', {}) + functions.extend((c[0], False) for c in cmdtable.itervalues()) + + for func, rstrip in functions: + if func.__doc__: + src = inspect.getsource(func) + name = "%s.%s" % (path, func.__name__) + lineno = func.func_code.co_firstlineno + doc = func.__doc__ + if rstrip: + doc = doc.rstrip() + lineno += offset(src, doc, name, 1) + print poentry(path, lineno, doc) + + +def rawtext(path): + src = open(path).read() + print poentry(path, 1, src) + + +if __name__ == "__main__": + # It is very important that we import the Mercurial modules from + # the source tree where hggettext is executed. Otherwise we might + # accidentally import and extract strings from a Mercurial + # installation mentioned in PYTHONPATH. + sys.path.insert(0, os.getcwd()) + from mercurial import demandimport; demandimport.enable() + for path in sys.argv[1:]: + if path.endswith('.txt'): + rawtext(path) + else: + docstrings(path) |