summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGustavo Niemeyer <gustavo@niemeyer.net>2004-07-22 18:44:01 +0000
committerGustavo Niemeyer <gustavo@niemeyer.net>2004-07-22 18:44:01 +0000
commit895b2faf57d64935616cb1e98919ca66cfa15604 (patch)
treed9563eaa8f4b67260dae289ff96dcfc6813f1850
parent9c23abff33b146cb173bf3a27b1cbeefef4a5c66 (diff)
downloadcpython-895b2faf57d64935616cb1e98919ca66cfa15604.tar.gz
This change implements the following gettext features, as
discussed recently in python-dev: In _locale module: - bind_textdomain_codeset() binding In gettext module: - bind_textdomain_codeset() function - lgettext(), lngettext(), ldgettext(), ldngettext(), which return translated strings encoded in preferred system encoding, if bind_textdomain_codeset() was not used. - Added equivalent functionality in translate() function and catalog classes. Every change was also documented.
-rw-r--r--Doc/lib/libgettext.tex132
-rw-r--r--Doc/lib/liblocale.tex11
-rw-r--r--Lib/gettext.py105
-rw-r--r--Misc/NEWS8
-rw-r--r--Modules/_localemodule.c22
-rw-r--r--configure.in4
-rw-r--r--pyconfig.h.in3
7 files changed, 256 insertions, 29 deletions
diff --git a/Doc/lib/libgettext.tex b/Doc/lib/libgettext.tex
index f2c20f8d3d..57d09d0e98 100644
--- a/Doc/lib/libgettext.tex
+++ b/Doc/lib/libgettext.tex
@@ -51,6 +51,14 @@ for \var{domain} is returned.\footnote{
the start of your application.}
\end{funcdesc}
+\begin{funcdesc}{bind_textdomain_codeset}{domain\optional{, codeset}}
+Bind the \var{domain} to \var{codeset}, changing the encoding of
+strings returned by the \function{gettext()} family of functions.
+If \var{codeset} is omitted, then the current binding is returned.
+
+\versionadded{2.4}
+\end{funcdesc}
+
\begin{funcdesc}{textdomain}{\optional{domain}}
Change or query the current global domain. If \var{domain} is
\code{None}, then the current global domain is returned, otherwise the
@@ -64,11 +72,27 @@ is usually aliased as \function{_} in the local namespace (see
examples below).
\end{funcdesc}
+\begin{funcdesc}{lgettext}{message}
+Equivalent to \function{gettext()}, but the translation is returned
+in the preferred system encoding, if no other encoding was explicitly
+set with \function{bind_textdomain_codeset()}.
+
+\versionadded{2.4}
+\end{funcdesc}
+
\begin{funcdesc}{dgettext}{domain, message}
Like \function{gettext()}, but look the message up in the specified
\var{domain}.
\end{funcdesc}
+\begin{funcdesc}{ldgettext}{domain, message}
+Equivalent to \function{dgettext()}, but the translation is returned
+in the preferred system encoding, if no other encoding was explicitly
+set with \function{bind_textdomain_codeset()}.
+
+\versionadded{2.4}
+\end{funcdesc}
+
\begin{funcdesc}{ngettext}{singular, plural, n}
Like \function{gettext()}, but consider plural forms. If a translation
@@ -87,6 +111,14 @@ formulas for a variety of languages.
\end{funcdesc}
+\begin{funcdesc}{lngettext}{singular, plural, n}
+Equivalent to \function{ngettext()}, but the translation is returned
+in the preferred system encoding, if no other encoding was explicitly
+set with \function{bind_textdomain_codeset()}.
+
+\versionadded{2.4}
+\end{funcdesc}
+
\begin{funcdesc}{dngettext}{domain, singular, plural, n}
Like \function{ngettext()}, but look the message up in the specified
\var{domain}.
@@ -94,6 +126,15 @@ Like \function{ngettext()}, but look the message up in the specified
\versionadded{2.3}
\end{funcdesc}
+\begin{funcdesc}{ldngettext}{domain, singular, plural, n}
+Equivalent to \function{dngettext()}, but the translation is returned
+in the preferred system encoding, if no other encoding was explicitly
+set with \function{bind_textdomain_codeset()}.
+
+\versionadded{2.4}
+\end{funcdesc}
+
+
Note that GNU \program{gettext} also defines a \function{dcgettext()}
method, but this was deemed not useful and so it is currently
@@ -152,8 +193,8 @@ they appear in the languages list or the environment variables.
\end{funcdesc}
\begin{funcdesc}{translation}{domain\optional{, localedir\optional{,
- languages\optional{,
- class_,\optional{fallback}}}}}
+ languages\optional{, class_\optional{,
+ fallback\optional{, codeset}}}}}}
Return a \class{Translations} instance based on the \var{domain},
\var{localedir}, and \var{languages}, which are first passed to
\function{find()} to get a list of the
@@ -161,7 +202,8 @@ associated \file{.mo} file paths. Instances with
identical \file{.mo} file names are cached. The actual class instantiated
is either \var{class_} if provided, otherwise
\class{GNUTranslations}. The class's constructor must take a single
-file object argument.
+file object argument. If provided, \var{codeset} will change the
+charset used to encode translated strings.
If multiple files are found, later files are used as fallbacks for
earlier ones. To allow setting the fallback, \function{copy.copy}
@@ -172,13 +214,17 @@ If no \file{.mo} file is found, this function raises
\exception{IOError} if \var{fallback} is false (which is the default),
and returns a \class{NullTranslations} instance if \var{fallback} is
true.
+
+\versionchanged[Added the \var{codeset} parameter]{2.4}
\end{funcdesc}
-\begin{funcdesc}{install}{domain\optional{, localedir\optional{, unicode}}}
+\begin{funcdesc}{install}{domain\optional{, localedir\optional{, unicode
+ \optional{, codeset}}}}
This installs the function \function{_} in Python's builtin namespace,
-based on \var{domain}, and \var{localedir} which are passed to the
-function \function{translation()}. The \var{unicode} flag is passed to
-the resulting translation object's \method{install} method.
+based on \var{domain}, \var{localedir}, and \var{codeset} which are
+passed to the function \function{translation()}. The \var{unicode}
+flag is passed to the resulting translation object's \method{install}
+method.
As seen below, you usually mark the strings in your application that are
candidates for translation, by wrapping them in a call to the
@@ -191,6 +237,8 @@ print _('This string will be translated.')
For convenience, you want the \function{_()} function to be installed in
Python's builtin namespace, so it is easily accessible in all modules
of your application.
+
+\versionchanged[Added the \var{codeset} parameter]{2.4}
\end{funcdesc}
\subsubsection{The \class{NullTranslations} class}
@@ -223,25 +271,39 @@ provide a translation for a given message.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{gettext}{message}
-If a fallback has been set, forward \method{gettext} to the fallback.
+If a fallback has been set, forward \method{gettext()} to the fallback.
+Otherwise, return the translated message. Overridden in derived classes.
+\end{methoddesc}
+
+\begin{methoddesc}[NullTranslations]{lgettext}{message}
+If a fallback has been set, forward \method{lgettext()} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
+
+\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ugettext}{message}
-If a fallback has been set, forward \method{ugettext} to the fallback.
+If a fallback has been set, forward \method{ugettext()} to the fallback.
Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ngettext}{singular, plural, n}
-If a fallback has been set, forward \method{ngettext} to the fallback.
+If a fallback has been set, forward \method{ngettext()} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
\versionadded{2.3}
\end{methoddesc}
+\begin{methoddesc}[NullTranslations]{lngettext}{singular, plural, n}
+If a fallback has been set, forward \method{ngettext()} to the fallback.
+Otherwise, return the translated message. Overridden in derived classes.
+
+\versionadded{2.4}
+\end{methoddesc}
+
\begin{methoddesc}[NullTranslations]{ungettext}{singular, plural, n}
-If a fallback has been set, forward \method{ungettext} to the fallback.
+If a fallback has been set, forward \method{ungettext()} to the fallback.
Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.
@@ -256,6 +318,20 @@ Return the ``protected'' \member{_info} variable.
Return the ``protected'' \member{_charset} variable.
\end{methoddesc}
+\begin{methoddesc}[NullTranslations]{output_charset}{}
+Return the ``protected'' \member{_output_charset} variable, which
+defines the encoding used to return translated messages.
+
+\versionadded{2.4}
+\end{methoddesc}
+
+\begin{methoddesc}[NullTranslations]{set_output_charset}{charset}
+Change the ``protected'' \member{_output_charset} variable, which
+defines the encoding used to return translated messages.
+
+\versionadded{2.4}
+\end{methoddesc}
+
\begin{methoddesc}[NullTranslations]{install}{\optional{unicode}}
If the \var{unicode} flag is false, this method installs
\method{self.gettext()} into the built-in namespace, binding it to
@@ -323,6 +399,14 @@ look up is forwarded to the fallback's \method{gettext()} method.
Otherwise, the \var{message} id is returned.
\end{methoddesc}
+\begin{methoddesc}[GNUTranslations]{lgettext}{message}
+Equivalent to \method{gettext()}, but the translation is returned
+in the preferred system encoding, if no other encoding was explicitly
+set with \method{set_output_charset()}.
+
+\versionadded{2.4}
+\end{methoddesc}
+
\begin{methoddesc}[GNUTranslations]{ugettext}{message}
Look up the \var{message} id in the catalog and return the
corresponding message string, as a Unicode string. If there is no
@@ -346,6 +430,14 @@ returned, and \var{plural} is returned in all other cases.
\versionadded{2.3}
\end{methoddesc}
+\begin{methoddesc}[GNUTranslations]{lngettext}{singular, plural, n}
+Equivalent to \method{gettext()}, but the translation is returned
+in the preferred system encoding, if no other encoding was explicitly
+set with \method{set_output_charset()}.
+
+\versionadded{2.4}
+\end{methoddesc}
+
\begin{methoddesc}[GNUTranslations]{ungettext}{singular, plural, n}
Do a plural-forms lookup of a message id. \var{singular} is used as
the message id for purposes of lookup in the catalog, while \var{n} is
@@ -495,7 +587,7 @@ you would put at the top of your module:
\begin{verbatim}
import gettext
t = gettext.translation('spam', '/usr/share/locale')
-_ = t.gettext
+_ = t.lgettext
\end{verbatim}
If your translators were providing you with Unicode strings in their
@@ -633,6 +725,21 @@ program to look for translatable strings marked with \function{N_()}.
\program{pygettext} and \program{xpot} both support this through the
use of command line switches.
+\subsubsection{\function{gettext()} vs. \function{lgettext()}}
+In Python 2.4 the \function{lgettext()} family of functions were
+introduced. The intention of these functions is to provide an
+alternative which is more compliant with the current
+implementation of GNU gettext. Unlike \function{gettext()}, which
+returns strings encoded with the same codeset used in the
+translation file, \function{lgettext()} will return strings
+encoded with the preferred system encoding, as returned by
+\function{locale.getpreferredencoding()}. Also notice that
+Python 2.4 introduces new functions to explicitly choose
+the codeset used in translated strings. If a codeset is explicitly
+set, even \function{lgettext()} will return translated strings in
+the requested codeset, as would be expected in the GNU gettext
+implementation.
+
\subsection{Acknowledgements}
The following people contributed code, feedback, design suggestions,
@@ -647,4 +754,5 @@ this module:
\item Martin von L\"owis
\item Fran\c cois Pinard
\item Barry Warsaw
+ \item Gustavo Niemeyer
\end{itemize}
diff --git a/Doc/lib/liblocale.tex b/Doc/lib/liblocale.tex
index bc96189915..c51bf20b4b 100644
--- a/Doc/lib/liblocale.tex
+++ b/Doc/lib/liblocale.tex
@@ -469,15 +469,16 @@ that the \module{_locale} module is not accessible as a shared library.
The locale module exposes the C library's gettext interface on systems
that provide this interface. It consists of the functions
\function{gettext()}, \function{dgettext()}, \function{dcgettext()},
-\function{textdomain()}, and \function{bindtextdomain()}. These are
-similar to the same functions in the \refmodule{gettext} module, but use
-the C library's binary format for message catalogs, and the C
-library's search algorithms for locating message catalogs.
+\function{textdomain()}, \function{bindtextdomain()}, and
+\function{bind_textdomain_codeset()}. These are similar to the same
+functions in the \refmodule{gettext} module, but use the C library's
+binary format for message catalogs, and the C library's search
+algorithms for locating message catalogs.
Python applications should normally find no need to invoke these
functions, and should use \refmodule{gettext} instead. A known
exception to this rule are applications that link use additional C
libraries which internally invoke \cfunction{gettext()} or
-\function{cdgettext()}. For these applications, it may be necessary to
+\function{dcgettext()}. For these applications, it may be necessary to
bind the text domain, so that the libraries can properly locate their
message catalogs.
diff --git a/Lib/gettext.py b/Lib/gettext.py
index 920aa4642a..6e291766a5 100644
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -46,7 +46,7 @@ internationalized, to the local language and cultural habits.
# find this format documented anywhere.
-import copy, os, re, struct, sys
+import locale, copy, os, re, struct, sys
from errno import ENOENT
@@ -171,6 +171,7 @@ class NullTranslations:
def __init__(self, fp=None):
self._info = {}
self._charset = None
+ self._output_charset = None
self._fallback = None
if fp is not None:
self._parse(fp)
@@ -189,6 +190,11 @@ class NullTranslations:
return self._fallback.gettext(message)
return message
+ def lgettext(self, message):
+ if self._fallback:
+ return self._fallback.lgettext(message)
+ return message
+
def ngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
@@ -197,6 +203,14 @@ class NullTranslations:
else:
return msgid2
+ def lngettext(self, msgid1, msgid2, n):
+ if self._fallback:
+ return self._fallback.lngettext(msgid1, msgid2, n)
+ if n == 1:
+ return msgid1
+ else:
+ return msgid2
+
def ugettext(self, message):
if self._fallback:
return self._fallback.ugettext(message)
@@ -216,6 +230,12 @@ class NullTranslations:
def charset(self):
return self._charset
+ def output_charset(self):
+ return self._output_charset
+
+ def set_output_charset(self, charset):
+ self._output_charset = charset
+
def install(self, unicode=False):
import __builtin__
__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
@@ -315,14 +335,29 @@ class GNUTranslations(NullTranslations):
return self._fallback.gettext(message)
return message
# Encode the Unicode tmsg back to an 8-bit string, if possible
- if self._charset:
+ if self._output_charset:
+ return tmsg.encode(self._output_charset)
+ elif self._charset:
return tmsg.encode(self._charset)
return tmsg
+ def lgettext(self, message):
+ missing = object()
+ tmsg = self._catalog.get(message, missing)
+ if tmsg is missing:
+ if self._fallback:
+ return self._fallback.lgettext(message)
+ return message
+ if self._output_charset:
+ return tmsg.encode(self._output_charset)
+ return tmsg.encode(locale.getpreferredencoding())
+
def ngettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
- if self._charset:
+ if self._output_charset:
+ return tmsg.encode(self._output_charset)
+ elif self._charset:
return tmsg.encode(self._charset)
return tmsg
except KeyError:
@@ -333,6 +368,20 @@ class GNUTranslations(NullTranslations):
else:
return msgid2
+ def lngettext(self, msgid1, msgid2, n):
+ try:
+ tmsg = self._catalog[(msgid1, self.plural(n))]
+ if self._output_charset:
+ return tmsg.encode(self._output_charset)
+ return tmsg.encode(locale.getpreferredencoding())
+ except KeyError:
+ if self._fallback:
+ return self._fallback.lngettext(msgid1, msgid2, n)
+ if n == 1:
+ return msgid1
+ else:
+ return msgid2
+
def ugettext(self, message):
missing = object()
tmsg = self._catalog.get(message, missing)
@@ -397,7 +446,7 @@ def find(domain, localedir=None, languages=None, all=0):
_translations = {}
def translation(domain, localedir=None, languages=None,
- class_=None, fallback=False):
+ class_=None, fallback=False, codeset=None):
if class_ is None:
class_ = GNUTranslations
mofiles = find(domain, localedir, languages, all=1)
@@ -414,9 +463,12 @@ def translation(domain, localedir=None, languages=None,
t = _translations.get(key)
if t is None:
t = _translations.setdefault(key, class_(open(mofile, 'rb')))
- # Copy the translation object to allow setting fallbacks.
- # All other instance data is shared with the cached object.
+ # Copy the translation object to allow setting fallbacks and
+ # output charset. All other instance data is shared with the
+ # cached object.
t = copy.copy(t)
+ if codeset:
+ t.set_output_charset(codeset)
if result is None:
result = t
else:
@@ -424,13 +476,16 @@ def translation(domain, localedir=None, languages=None,
return result
-def install(domain, localedir=None, unicode=False):
- translation(domain, localedir, fallback=True).install(unicode)
+def install(domain, localedir=None, unicode=False, codeset=None):
+ t = translation(domain, localedir, fallback=True, codeset=codeset)
+ t.install(unicode)
# a mapping b/w domains and locale directories
_localedirs = {}
+# a mapping b/w domains and codesets
+_localecodesets = {}
# current global domain, `messages' used for compatibility w/ GNU gettext
_current_domain = 'messages'
@@ -449,17 +504,33 @@ def bindtextdomain(domain, localedir=None):
return _localedirs.get(domain, _default_localedir)
+def bind_textdomain_codeset(domain, codeset=None):
+ global _localecodesets
+ if codeset is not None:
+ _localecodesets[domain] = codeset
+ return _localecodesets.get(domain)
+
+
def dgettext(domain, message):
try:
- t = translation(domain, _localedirs.get(domain, None))
+ t = translation(domain, _localedirs.get(domain, None),
+ codeset=_localecodesets.get(domain))
except IOError:
return message
return t.gettext(message)
+def ldgettext(domain, message):
+ try:
+ t = translation(domain, _localedirs.get(domain, None),
+ codeset=_localecodesets.get(domain))
+ except IOError:
+ return message
+ return t.lgettext(message)
def dngettext(domain, msgid1, msgid2, n):
try:
- t = translation(domain, _localedirs.get(domain, None))
+ t = translation(domain, _localedirs.get(domain, None),
+ codeset=_localecodesets.get(domain))
except IOError:
if n == 1:
return msgid1
@@ -467,14 +538,28 @@ def dngettext(domain, msgid1, msgid2, n):
return msgid2
return t.ngettext(msgid1, msgid2, n)
+def ldngettext(domain, msgid1, msgid2, n):
+ try:
+ t = translation(domain, _localedirs.get(domain, None),
+ codeset=_localecodesets.get(domain))
+ except IOError:
+ if n == 1:
+ return msgid1
+ else:
+ return msgid2
+ return t.lngettext(msgid1, msgid2, n)
def gettext(message):
return dgettext(_current_domain, message)
+def lgettext(message):
+ return ldgettext(_current_domain, message)
def ngettext(msgid1, msgid2, n):
return dngettext(_current_domain, msgid1, msgid2, n)
+def lngettext(msgid1, msgid2, n):
+ return ldngettext(_current_domain, msgid1, msgid2, n)
# dcgettext() has been deemed unnecessary and is not implemented.
diff --git a/Misc/NEWS b/Misc/NEWS
index 6e8c710563..acb6f6245d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -33,6 +33,8 @@ Core and builtins
will cause a TypeError to be raised. This matches the behavior of
Jython.
+- Implemented bind_textdomain_codeset() in locale module.
+
Extension modules
-----------------
@@ -112,6 +114,12 @@ Library
- Bug #990307: when keep_empty_values is True, cgi.parse_qsl()
no longer returns spurious empty fields.
+- Implemented bind_textdomain_codeset() in gettext module.
+
+- Introduced in gettext module the l*gettext() family of functions,
+ which return translation strings encoded in the preferred encoding,
+ as informed by locale module's getpreferredencoding().
+
Tools/Demos
-----------
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 0f8a71aadc..2d6541dedb 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -649,6 +649,24 @@ PyIntl_bindtextdomain(PyObject* self,PyObject*args)
return PyString_FromString(dirname);
}
+#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
+PyDoc_STRVAR(bind_textdomain_codeset__doc__,
+"bind_textdomain_codeset(domain, codeset) -> string\n"
+"Bind the C library's domain to codeset.");
+
+static PyObject*
+PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
+{
+ char *domain,*codeset;
+ if (!PyArg_ParseTuple(args, "sz", &domain, &codeset))
+ return NULL;
+ codeset = bind_textdomain_codeset(domain, codeset);
+ if (codeset)
+ return PyString_FromString(codeset);
+ Py_RETURN_NONE;
+}
+#endif
+
#endif
static struct PyMethodDef PyLocale_Methods[] = {
@@ -678,6 +696,10 @@ static struct PyMethodDef PyLocale_Methods[] = {
textdomain__doc__},
{"bindtextdomain",(PyCFunction)PyIntl_bindtextdomain,METH_VARARGS,
bindtextdomain__doc__},
+#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
+ {"bind_textdomain_codeset",(PyCFunction)PyIntl_bind_textdomain_codeset,
+ METH_VARARGS, bind_textdomain_codeset__doc__},
+#endif
#endif
{NULL, NULL}
};
diff --git a/configure.in b/configure.in
index 77522c35f6..f73d1ceac1 100644
--- a/configure.in
+++ b/configure.in
@@ -2044,8 +2044,8 @@ fi
AC_MSG_RESULT(MACHDEP_OBJS)
# checks for library functions
-AC_CHECK_FUNCS(alarm chown clock confstr ctermid execv \
- fork fpathconf ftime ftruncate \
+AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr ctermid \
+ execv fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getsid getwd \
kill killpg lchown lstat mkfifo mknod mktime \
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 4918ab7fca..5e1a43c4b8 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -37,6 +37,9 @@
/* Define this if your time.h defines altzone. */
#undef HAVE_ALTZONE
+/* Define to 1 if you have the `bind_textdomain_codeset' function. */
+#undef HAVE_BIND_TEXTDOMAIN_CODESET
+
/* Define to 1 if you have the <bluetooth/bluetooth.h> header file. */
#undef HAVE_BLUETOOTH_BLUETOOTH_H