diff options
-rw-r--r-- | heatclient/openstack/common/__init__.py | 2 | ||||
-rw-r--r-- | heatclient/openstack/common/gettextutils.py | 495 | ||||
-rw-r--r-- | heatclient/openstack/common/strutils.py | 97 |
3 files changed, 360 insertions, 234 deletions
diff --git a/heatclient/openstack/common/__init__.py b/heatclient/openstack/common/__init__.py index e69de29..2a00f3b 100644 --- a/heatclient/openstack/common/__init__.py +++ b/heatclient/openstack/common/__init__.py @@ -0,0 +1,2 @@ +import six +six.add_move(six.MovedModule('mox', 'mox', 'mox3.mox')) diff --git a/heatclient/openstack/common/gettextutils.py b/heatclient/openstack/common/gettextutils.py index 1b3ede9..1a49c9d 100644 --- a/heatclient/openstack/common/gettextutils.py +++ b/heatclient/openstack/common/gettextutils.py @@ -1,5 +1,3 @@ -# vim: tabstop=4 shiftwidth=4 softtabstop=4 - # Copyright 2012 Red Hat, Inc. # Copyright 2013 IBM Corp. # All Rights Reserved. @@ -25,14 +23,12 @@ Usual usage in an openstack.common module: """ import copy +import functools import gettext -import logging +import locale +from logging import handlers import os import re -try: - import UserString as _userString -except ImportError: - import collections as _userString from babel import localedata import six @@ -40,6 +36,17 @@ import six _localedir = os.environ.get('heatclient'.upper() + '_LOCALEDIR') _t = gettext.translation('heatclient', localedir=_localedir, fallback=True) +# We use separate translation catalogs for each log level, so set up a +# mapping between the log level name and the translator. The domain +# for the log level is project_name + "-log-" + log_level so messages +# for each level end up in their own catalog. +_t_log_levels = dict( + (level, gettext.translation('heatclient' + '-log-' + level, + localedir=_localedir, + fallback=True)) + for level in ['info', 'warning', 'error', 'critical'] +) + _AVAILABLE_LANGUAGES = {} USE_LAZY = False @@ -58,13 +65,35 @@ def enable_lazy(): def _(msg): if USE_LAZY: - return Message(msg, 'heatclient') + return Message(msg, domain='heatclient') else: if six.PY3: return _t.gettext(msg) return _t.ugettext(msg) +def _log_translation(msg, level): + """Build a single translation of a log message + """ + if USE_LAZY: + return Message(msg, domain='heatclient' + '-log-' + level) + else: + translator = _t_log_levels[level] + if six.PY3: + return translator.gettext(msg) + return translator.ugettext(msg) + +# Translators for log levels. +# +# The abbreviated names are meant to reflect the usual use of a short +# name like '_'. The "L" is for "log" and the other letter comes from +# the level. +_LI = functools.partial(_log_translation, level='info') +_LW = functools.partial(_log_translation, level='warning') +_LE = functools.partial(_log_translation, level='error') +_LC = functools.partial(_log_translation, level='critical') + + def install(domain, lazy=False): """Install a _() function using the given translation domain. @@ -90,11 +119,6 @@ def install(domain, lazy=False): # messages in OpenStack. We override the standard _() function # and % (format string) operation to build Message objects that can # later be translated when we have more information. - # - # Also included below is an example LocaleHandler that translates - # Messages to an associated locale, effectively allowing many logs, - # each with their own locale. - def _lazy_gettext(msg): """Create and return a Message object. @@ -105,7 +129,7 @@ def install(domain, lazy=False): Message encapsulates a string so that we can translate it later when needed. """ - return Message(msg, domain) + return Message(msg, domain=domain) from six import moves moves.builtins.__dict__['_'] = _lazy_gettext @@ -120,182 +144,169 @@ def install(domain, lazy=False): unicode=True) -class Message(_userString.UserString, object): - """Class used to encapsulate translatable messages.""" - def __init__(self, msg, domain): - # _msg is the gettext msgid and should never change - self._msg = msg - self._left_extra_msg = '' - self._right_extra_msg = '' - self._locale = None - self.params = None - self.domain = domain - - @property - def data(self): - # NOTE(mrodden): this should always resolve to a unicode string - # that best represents the state of the message currently - - localedir = os.environ.get(self.domain.upper() + '_LOCALEDIR') - if self.locale: - lang = gettext.translation(self.domain, - localedir=localedir, - languages=[self.locale], - fallback=True) - else: - # use system locale for translations - lang = gettext.translation(self.domain, - localedir=localedir, - fallback=True) +class Message(six.text_type): + """A Message object is a unicode object that can be translated. + Translation of Message is done explicitly using the translate() method. + For all non-translation intents and purposes, a Message is simply unicode, + and can be treated as such. + """ + + def __new__(cls, msgid, msgtext=None, params=None, + domain='heatclient', *args): + """Create a new Message object. + + In order for translation to work gettext requires a message ID, this + msgid will be used as the base unicode text. It is also possible + for the msgid and the base unicode text to be different by passing + the msgtext parameter. + """ + # If the base msgtext is not given, we use the default translation + # of the msgid (which is in English) just in case the system locale is + # not English, so that the base text will be in that locale by default. + if not msgtext: + msgtext = Message._translate_msgid(msgid, domain) + # We want to initialize the parent unicode with the actual object that + # would have been plain unicode if 'Message' was not enabled. + msg = super(Message, cls).__new__(cls, msgtext) + msg.msgid = msgid + msg.domain = domain + msg.params = params + return msg + + def translate(self, desired_locale=None): + """Translate this message to the desired locale. + + :param desired_locale: The desired locale to translate the message to, + if no locale is provided the message will be + translated to the system's default locale. + + :returns: the translated message in unicode + """ + + translated_message = Message._translate_msgid(self.msgid, + self.domain, + desired_locale) + if self.params is None: + # No need for more translation + return translated_message + + # This Message object may have been formatted with one or more + # Message objects as substitution arguments, given either as a single + # argument, part of a tuple, or as one or more values in a dictionary. + # When translating this Message we need to translate those Messages too + translated_params = _translate_args(self.params, desired_locale) + + translated_message = translated_message % translated_params + + return translated_message + + @staticmethod + def _translate_msgid(msgid, domain, desired_locale=None): + if not desired_locale: + system_locale = locale.getdefaultlocale() + # If the system locale is not available to the runtime use English + if not system_locale[0]: + desired_locale = 'en_US' + else: + desired_locale = system_locale[0] + + locale_dir = os.environ.get(domain.upper() + '_LOCALEDIR') + lang = gettext.translation(domain, + localedir=locale_dir, + languages=[desired_locale], + fallback=True) if six.PY3: - ugettext = lang.gettext - else: - ugettext = lang.ugettext - - full_msg = (self._left_extra_msg + - ugettext(self._msg) + - self._right_extra_msg) - - if self.params is not None: - full_msg = full_msg % self.params - - return six.text_type(full_msg) - - @property - def locale(self): - return self._locale - - @locale.setter - def locale(self, value): - self._locale = value - if not self.params: - return - - # This Message object may have been constructed with one or more - # Message objects as substitution parameters, given as a single - # Message, or a tuple or Map containing some, so when setting the - # locale for this Message we need to set it for those Messages too. - if isinstance(self.params, Message): - self.params.locale = value - return - if isinstance(self.params, tuple): - for param in self.params: - if isinstance(param, Message): - param.locale = value - return - if isinstance(self.params, dict): - for param in self.params.values(): - if isinstance(param, Message): - param.locale = value - - def _save_dictionary_parameter(self, dict_param): - full_msg = self.data - # look for %(blah) fields in string; - # ignore %% and deal with the - # case where % is first character on the line - keys = re.findall('(?:[^%]|^)?%\((\w*)\)[a-z]', full_msg) - - # if we don't find any %(blah) blocks but have a %s - if not keys and re.findall('(?:[^%]|^)%[a-z]', full_msg): - # apparently the full dictionary is the parameter - params = copy.deepcopy(dict_param) + translator = lang.gettext else: - params = {} - for key in keys: - try: - params[key] = copy.deepcopy(dict_param[key]) - except TypeError: - # cast uncopyable thing to unicode string - params[key] = six.text_type(dict_param[key]) + translator = lang.ugettext - return params + translated_message = translator(msgid) + return translated_message - def _save_parameters(self, other): - # we check for None later to see if - # we actually have parameters to inject, - # so encapsulate if our parameter is actually None + def __mod__(self, other): + # When we mod a Message we want the actual operation to be performed + # by the parent class (i.e. unicode()), the only thing we do here is + # save the original msgid and the parameters in case of a translation + params = self._sanitize_mod_params(other) + unicode_mod = super(Message, self).__mod__(params) + modded = Message(self.msgid, + msgtext=unicode_mod, + params=params, + domain=self.domain) + return modded + + def _sanitize_mod_params(self, other): + """Sanitize the object being modded with this Message. + + - Add support for modding 'None' so translation supports it + - Trim the modded object, which can be a large dictionary, to only + those keys that would actually be used in a translation + - Snapshot the object being modded, in case the message is + translated, it will be used as it was when the Message was created + """ if other is None: - self.params = (other, ) + params = (other,) elif isinstance(other, dict): - self.params = self._save_dictionary_parameter(other) + params = self._trim_dictionary_parameters(other) else: - # fallback to casting to unicode, - # this will handle the problematic python code-like - # objects that cannot be deep-copied - try: - self.params = copy.deepcopy(other) - except TypeError: - self.params = six.text_type(other) - - return self - - # overrides to be more string-like - def __unicode__(self): - return self.data - - def __str__(self): - if six.PY3: - return self.__unicode__() - return self.data.encode('utf-8') + params = self._copy_param(other) + return params - def __getstate__(self): - to_copy = ['_msg', '_right_extra_msg', '_left_extra_msg', - 'domain', 'params', '_locale'] - new_dict = self.__dict__.fromkeys(to_copy) - for attr in to_copy: - new_dict[attr] = copy.deepcopy(self.__dict__[attr]) + def _trim_dictionary_parameters(self, dict_param): + """Return a dict that only has matching entries in the msgid.""" + # NOTE(luisg): Here we trim down the dictionary passed as parameters + # to avoid carrying a lot of unnecessary weight around in the message + # object, for example if someone passes in Message() % locals() but + # only some params are used, and additionally we prevent errors for + # non-deepcopyable objects by unicoding() them. + + # Look for %(param) keys in msgid; + # Skip %% and deal with the case where % is first character on the line + keys = re.findall('(?:[^%]|^)?%\((\w*)\)[a-z]', self.msgid) + + # If we don't find any %(param) keys but have a %s + if not keys and re.findall('(?:[^%]|^)%[a-z]', self.msgid): + # Apparently the full dictionary is the parameter + params = self._copy_param(dict_param) + else: + params = {} + # Save our existing parameters as defaults to protect + # ourselves from losing values if we are called through an + # (erroneous) chain that builds a valid Message with + # arguments, and then does something like "msg % kwds" + # where kwds is an empty dictionary. + src = {} + if isinstance(self.params, dict): + src.update(self.params) + src.update(dict_param) + for key in keys: + params[key] = self._copy_param(src[key]) - return new_dict + return params - def __setstate__(self, state): - for (k, v) in state.items(): - setattr(self, k, v) + def _copy_param(self, param): + try: + return copy.deepcopy(param) + except TypeError: + # Fallback to casting to unicode this will handle the + # python code-like objects that can't be deep-copied + return six.text_type(param) - # operator overloads def __add__(self, other): - copied = copy.deepcopy(self) - copied._right_extra_msg += other.__str__() - return copied + msg = _('Message objects do not support addition.') + raise TypeError(msg) def __radd__(self, other): - copied = copy.deepcopy(self) - copied._left_extra_msg += other.__str__() - return copied + return self.__add__(other) - def __mod__(self, other): - # do a format string to catch and raise - # any possible KeyErrors from missing parameters - self.data % other - copied = copy.deepcopy(self) - return copied._save_parameters(other) - - def __mul__(self, other): - return self.data * other - - def __rmul__(self, other): - return other * self.data - - def __getitem__(self, key): - return self.data[key] - - def __getslice__(self, start, end): - return self.data.__getslice__(start, end) - - def __getattribute__(self, name): - # NOTE(mrodden): handle lossy operations that we can't deal with yet - # These override the UserString implementation, since UserString - # uses our __class__ attribute to try and build a new message - # after running the inner data string through the operation. - # At that point, we have lost the gettext message id and can just - # safely resolve to a string instead. - ops = ['capitalize', 'center', 'decode', 'encode', - 'expandtabs', 'ljust', 'lstrip', 'replace', 'rjust', 'rstrip', - 'strip', 'swapcase', 'title', 'translate', 'upper', 'zfill'] - if name in ops: - return getattr(self.data, name) - else: - return _userString.UserString.__getattribute__(self, name) + def __str__(self): + # NOTE(luisg): Logging in python 2.6 tries to str() log records, + # and it expects specifically a UnicodeError in order to proceed. + msg = _('Message objects do not support str() because they may ' + 'contain non-ascii characters. ' + 'Please use unicode() or translate() instead.') + raise UnicodeError(msg) def get_available_languages(domain): @@ -321,53 +332,143 @@ def get_available_languages(domain): list_identifiers = (getattr(localedata, 'list', None) or getattr(localedata, 'locale_identifiers')) locale_identifiers = list_identifiers() + for i in locale_identifiers: if find(i) is not None: language_list.append(i) + + # NOTE(luisg): Babel>=1.0,<1.3 has a bug where some OpenStack supported + # locales (e.g. 'zh_CN', and 'zh_TW') aren't supported even though they + # are perfectly legitimate locales: + # https://github.com/mitsuhiko/babel/issues/37 + # In Babel 1.3 they fixed the bug and they support these locales, but + # they are still not explicitly "listed" by locale_identifiers(). + # That is why we add the locales here explicitly if necessary so that + # they are listed as supported. + aliases = {'zh': 'zh_CN', + 'zh_Hant_HK': 'zh_HK', + 'zh_Hant': 'zh_TW', + 'fil': 'tl_PH'} + for (locale, alias) in six.iteritems(aliases): + if locale in language_list and alias not in language_list: + language_list.append(alias) + _AVAILABLE_LANGUAGES[domain] = language_list return copy.copy(language_list) -def get_localized_message(message, user_locale): - """Gets a localized version of the given message in the given locale. +def translate(obj, desired_locale=None): + """Gets the translated unicode representation of the given object. - If the message is not a Message object the message is returned as-is. - If the locale is None the message is translated to the default locale. + If the object is not translatable it is returned as-is. + If the locale is None the object is translated to the system locale. - :returns: the translated message in unicode, or the original message if + :param obj: the object to translate + :param desired_locale: the locale to translate the message to, if None the + default system locale will be used + :returns: the translated object in unicode, or the original object if it could not be translated """ - translated = message + message = obj + if not isinstance(message, Message): + # If the object to translate is not already translatable, + # let's first get its unicode representation + message = six.text_type(obj) if isinstance(message, Message): - original_locale = message.locale - message.locale = user_locale - translated = six.text_type(message) - message.locale = original_locale - return translated + # Even after unicoding() we still need to check if we are + # running with translatable unicode before translating + return message.translate(desired_locale) + return obj + +def _translate_args(args, desired_locale=None): + """Translates all the translatable elements of the given arguments object. -class LocaleHandler(logging.Handler): - """Handler that can have a locale associated to translate Messages. + This method is used for translating the translatable values in method + arguments which include values of tuples or dictionaries. + If the object is not a tuple or a dictionary the object itself is + translated if it is translatable. - A quick example of how to utilize the Message class above. - LocaleHandler takes a locale and a target logging.Handler object - to forward LogRecord objects to after translating the internal Message. + If the locale is None the object is translated to the system locale. + + :param args: the args to translate + :param desired_locale: the locale to translate the args to, if None the + default system locale will be used + :returns: a new args object with the translated contents of the original """ + if isinstance(args, tuple): + return tuple(translate(v, desired_locale) for v in args) + if isinstance(args, dict): + translated_dict = {} + for (k, v) in six.iteritems(args): + translated_v = translate(v, desired_locale) + translated_dict[k] = translated_v + return translated_dict + return translate(args, desired_locale) + + +class TranslationHandler(handlers.MemoryHandler): + """Handler that translates records before logging them. + + The TranslationHandler takes a locale and a target logging.Handler object + to forward LogRecord objects to after translating them. This handler + depends on Message objects being logged, instead of regular strings. + + The handler can be configured declaratively in the logging.conf as follows: - def __init__(self, locale, target): - """Initialize a LocaleHandler + [handlers] + keys = translatedlog, translator + + [handler_translatedlog] + class = handlers.WatchedFileHandler + args = ('/var/log/api-localized.log',) + formatter = context + + [handler_translator] + class = openstack.common.log.TranslationHandler + target = translatedlog + args = ('zh_CN',) + + If the specified locale is not available in the system, the handler will + log in the default locale. + """ + + def __init__(self, locale=None, target=None): + """Initialize a TranslationHandler :param locale: locale to use for translating messages :param target: logging.Handler object to forward LogRecord objects to after translation """ - logging.Handler.__init__(self) + # NOTE(luisg): In order to allow this handler to be a wrapper for + # other handlers, such as a FileHandler, and still be able to + # configure it using logging.conf, this handler has to extend + # MemoryHandler because only the MemoryHandlers' logging.conf + # parsing is implemented such that it accepts a target handler. + handlers.MemoryHandler.__init__(self, capacity=0, target=target) self.locale = locale - self.target = target + + def setFormatter(self, fmt): + self.target.setFormatter(fmt) def emit(self, record): - if isinstance(record.msg, Message): - # set the locale and resolve to a string - record.msg.locale = self.locale + # We save the message from the original record to restore it + # after translation, so other handlers are not affected by this + original_msg = record.msg + original_args = record.args + + try: + self._translate_and_log_record(record) + finally: + record.msg = original_msg + record.args = original_args + + def _translate_and_log_record(self, record): + record.msg = translate(record.msg, self.locale) + + # In addition to translating the message, we also need to translate + # arguments that were passed to the log method that were not part + # of the main message e.g., log.info(_('Some message %s'), this_one)) + record.args = _translate_args(record.args, self.locale) self.target.emit(record) diff --git a/heatclient/openstack/common/strutils.py b/heatclient/openstack/common/strutils.py index dd152b3..d6aef26 100644 --- a/heatclient/openstack/common/strutils.py +++ b/heatclient/openstack/common/strutils.py @@ -17,25 +17,31 @@ System-level utilities and helper functions. """ +import math import re import sys import unicodedata import six -from heatclient.openstack.common.gettextutils import _ # noqa +from heatclient.openstack.common.gettextutils import _ -# Used for looking up extensions of text -# to their 'multiplied' byte amount -BYTE_MULTIPLIERS = { - '': 1, - 't': 1024 ** 4, - 'g': 1024 ** 3, - 'm': 1024 ** 2, - 'k': 1024, +UNIT_PREFIX_EXPONENT = { + 'k': 1, + 'K': 1, + 'Ki': 1, + 'M': 2, + 'Mi': 2, + 'G': 3, + 'Gi': 3, + 'T': 4, + 'Ti': 4, +} +UNIT_SYSTEM_INFO = { + 'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')), + 'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')), } -BYTE_REGEX = re.compile(r'(^-?\d+)(\D*)') TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes') FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no') @@ -58,12 +64,12 @@ def int_from_bool_as_string(subject): return bool_from_string(subject) and 1 or 0 -def bool_from_string(subject, strict=False): +def bool_from_string(subject, strict=False, default=False): """Interpret a string as a boolean. A case-insensitive match is performed such that strings matching 't', 'true', 'on', 'y', 'yes', or '1' are considered True and, when - `strict=False`, anything else is considered False. + `strict=False`, anything else returns the value specified by 'default'. Useful for JSON-decoded stuff and config file parsing. @@ -88,11 +94,12 @@ def bool_from_string(subject, strict=False): 'acceptable': acceptable} raise ValueError(msg) else: - return False + return default def safe_decode(text, incoming=None, errors='strict'): - """Decodes incoming str using `incoming` if they're not already unicode. + """Decodes incoming text/bytes string using `incoming` if they're not + already unicode. :param incoming: Text's current encoding :param errors: Errors handling policy. See here for valid @@ -101,7 +108,7 @@ def safe_decode(text, incoming=None, errors='strict'): representation of it. :raises TypeError: If text is not an instance of str """ - if not isinstance(text, six.string_types): + if not isinstance(text, (six.string_types, six.binary_type)): raise TypeError("%s can't be decoded" % type(text)) if isinstance(text, six.text_type): @@ -131,7 +138,7 @@ def safe_decode(text, incoming=None, errors='strict'): def safe_encode(text, incoming=None, encoding='utf-8', errors='strict'): - """Encodes incoming str/unicode using `encoding`. + """Encodes incoming text/bytes string using `encoding`. If incoming is not specified, text is expected to be encoded with current python's default encoding. (`sys.getdefaultencoding`) @@ -144,7 +151,7 @@ def safe_encode(text, incoming=None, representation of it. :raises TypeError: If text is not an instance of str """ - if not isinstance(text, six.string_types): + if not isinstance(text, (six.string_types, six.binary_type)): raise TypeError("%s can't be encoded" % type(text)) if not incoming: @@ -167,34 +174,50 @@ def safe_encode(text, incoming=None, return text -def to_bytes(text, default=0): - """Converts a string into an integer of bytes. +def string_to_bytes(text, unit_system='IEC', return_int=False): + """Converts a string into an float representation of bytes. + + The units supported for IEC :: + + Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it) + KB, KiB, MB, MiB, GB, GiB, TB, TiB + + The units supported for SI :: - Looks at the last characters of the text to determine - what conversion is needed to turn the input text into a byte number. - Supports "B, K(B), M(B), G(B), and T(B)". (case insensitive) + kb(it), Mb(it), Gb(it), Tb(it) + kB, MB, GB, TB + + Note that the SI unit system does not support capital letter 'K' :param text: String input for bytes size conversion. - :param default: Default return value when text is blank. + :param unit_system: Unit system for byte size conversion. + :param return_int: If True, returns integer representation of text + in bytes. (default: decimal) + :returns: Numerical representation of text in bytes. + :raises ValueError: If text has an invalid value. """ - match = BYTE_REGEX.search(text) + try: + base, reg_ex = UNIT_SYSTEM_INFO[unit_system] + except KeyError: + msg = _('Invalid unit system: "%s"') % unit_system + raise ValueError(msg) + match = reg_ex.match(text) if match: - magnitude = int(match.group(1)) - mult_key_org = match.group(2) - if not mult_key_org: - return magnitude - elif text: + magnitude = float(match.group(1)) + unit_prefix = match.group(2) + if match.group(3) in ['b', 'bit']: + magnitude /= 8 + else: msg = _('Invalid string format: %s') % text - raise TypeError(msg) + raise ValueError(msg) + if not unit_prefix: + res = magnitude else: - return default - mult_key = mult_key_org.lower().replace('b', '', 1) - multiplier = BYTE_MULTIPLIERS.get(mult_key) - if multiplier is None: - msg = _('Unknown byte multiplier: %s') % mult_key_org - raise TypeError(msg) - return magnitude * multiplier + res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix]) + if return_int: + return int(math.ceil(res)) + return res def to_slug(value, incoming=None, errors="strict"): |