summaryrefslogtreecommitdiff
path: root/i18n/hggettext
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 14:49:51 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 14:49:51 +0100
commita498da43c7fdb9f24b73680c02a4a3588cc62d9a (patch)
treedaf8119dae1749b5165b68033a1b23a7375ce9ce /i18n/hggettext
downloadmercurial-tarball-a498da43c7fdb9f24b73680c02a4a3588cc62d9a.tar.gz
Tarball conversion
Diffstat (limited to 'i18n/hggettext')
-rwxr-xr-xi18n/hggettext137
1 files changed, 137 insertions, 0 deletions
diff --git a/i18n/hggettext b/i18n/hggettext
new file mode 100755
index 0000000..983459a
--- /dev/null
+++ b/i18n/hggettext
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+#
+# hggettext - carefully extract docstrings for Mercurial
+#
+# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+# The normalize function is taken from pygettext which is distributed
+# with Python under the Python License, which is GPL compatible.
+
+"""Extract docstrings from Mercurial commands.
+
+Compared to pygettext, this script knows about the cmdtable and table
+dictionaries used by Mercurial, and will only extract docstrings from
+functions mentioned therein.
+
+Use xgettext like normal to extract strings marked as translatable and
+join the message cataloges to get the final catalog.
+"""
+
+import os, sys, inspect
+
+
+def escape(s):
+ # The order is important, the backslash must be escaped first
+ # since the other replacements introduce new backslashes
+ # themselves.
+ s = s.replace('\\', '\\\\')
+ s = s.replace('\n', '\\n')
+ s = s.replace('\r', '\\r')
+ s = s.replace('\t', '\\t')
+ s = s.replace('"', '\\"')
+ return s
+
+
+def normalize(s):
+ # This converts the various Python string types into a format that
+ # is appropriate for .po files, namely much closer to C style.
+ lines = s.split('\n')
+ if len(lines) == 1:
+ s = '"' + escape(s) + '"'
+ else:
+ if not lines[-1]:
+ del lines[-1]
+ lines[-1] = lines[-1] + '\n'
+ lines = map(escape, lines)
+ lineterm = '\\n"\n"'
+ s = '""\n"' + lineterm.join(lines) + '"'
+ return s
+
+
+def poentry(path, lineno, s):
+ return ('#: %s:%d\n' % (path, lineno) +
+ 'msgid %s\n' % normalize(s) +
+ 'msgstr ""\n')
+
+
+def offset(src, doc, name, default):
+ """Compute offset or issue a warning on stdout."""
+ # Backslashes in doc appear doubled in src.
+ end = src.find(doc.replace('\\', '\\\\'))
+ if end == -1:
+ # This can happen if the docstring contains unnecessary escape
+ # sequences such as \" in a triple-quoted string. The problem
+ # is that \" is turned into " and so doc wont appear in src.
+ sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n"
+ % (name, default))
+ return default
+ else:
+ return src.count('\n', 0, end)
+
+
+def importpath(path):
+ """Import a path like foo/bar/baz.py and return the baz module."""
+ if path.endswith('.py'):
+ path = path[:-3]
+ if path.endswith('/__init__'):
+ path = path[:-9]
+ path = path.replace('/', '.')
+ mod = __import__(path)
+ for comp in path.split('.')[1:]:
+ mod = getattr(mod, comp)
+ return mod
+
+
+def docstrings(path):
+ """Extract docstrings from path.
+
+ This respects the Mercurial cmdtable/table convention and will
+ only extract docstrings from functions mentioned in these tables.
+ """
+ mod = importpath(path)
+ if mod.__doc__:
+ src = open(path).read()
+ lineno = 1 + offset(src, mod.__doc__, path, 7)
+ print poentry(path, lineno, mod.__doc__)
+
+ functions = list(getattr(mod, 'i18nfunctions', []))
+ functions = [(f, True) for f in functions]
+
+ cmdtable = getattr(mod, 'cmdtable', {})
+ if not cmdtable:
+ # Maybe we are processing mercurial.commands?
+ cmdtable = getattr(mod, 'table', {})
+ functions.extend((c[0], False) for c in cmdtable.itervalues())
+
+ for func, rstrip in functions:
+ if func.__doc__:
+ src = inspect.getsource(func)
+ name = "%s.%s" % (path, func.__name__)
+ lineno = func.func_code.co_firstlineno
+ doc = func.__doc__
+ if rstrip:
+ doc = doc.rstrip()
+ lineno += offset(src, doc, name, 1)
+ print poentry(path, lineno, doc)
+
+
+def rawtext(path):
+ src = open(path).read()
+ print poentry(path, 1, src)
+
+
+if __name__ == "__main__":
+ # It is very important that we import the Mercurial modules from
+ # the source tree where hggettext is executed. Otherwise we might
+ # accidentally import and extract strings from a Mercurial
+ # installation mentioned in PYTHONPATH.
+ sys.path.insert(0, os.getcwd())
+ from mercurial import demandimport; demandimport.enable()
+ for path in sys.argv[1:]:
+ if path.endswith('.txt'):
+ rawtext(path)
+ else:
+ docstrings(path)