Sync oslo imageutils, strutils to Ironic

Changes in imageutils: =============================== 9e88af1 fixed typos found by RETF rules b455fac Enable decimal value input in imageutils.QemuImgInfo bec3a5e Implements SI/IEC unit system conversion to bytes 8b2b0b7 Use hacking import_exceptions for gettextutils._ aad179d Fixing misspelled encryption key in QemuImgInfo 12bcdb7 Remove vim header 2bd46eb Refactors byte size extraction logic 0356685 Adds common image utils Changes in strutils: ==================== cb5a804 Move `mask_password` to strutils 8a0f567 Remove str() from LOG.* and exceptions fd18c28 Fix safe_encode(): return bytes on Python 3 302c7c8 strutils: Allow safe_{encode,decode} to take bytes as input bec3a5e Implements SI/IEC unit system conversion to bytes e53fe85 strutils bool_from_string, allow specified default 8b2b0b7 Use hacking import_exceptions for gettextutils._ 84d461e Fix a bug in safe_encode where it returns a bytes object in py3 12bcdb7 Remove vim header Change-Id: If0d16d70c004344511b9851c8a533759f0d2bb52
author: Zhongyue Luo <zhongyue.nah@intel.com> 2014-08-05 11:03:33 +0800
committer: Zhongyue Luo <zhongyue.nah@intel.com> 2014-08-05 12:57:50 +0800
commit: 5d4d79fbd1651a5285bf7334a71245b40585cef5 (patch)
tree: aa80c99d463902ae60d7fe4a83b8fe52cc8cbedd
parent: 797fdbead2d4cde2c4ddfb3b49a2f367dd9f0487 (diff)
download: ironic-5d4d79fbd1651a5285bf7334a71245b40585cef5.tar.gz
4 files changed, 274 insertions, 160 deletions
diff --git a/ironic/common/images.py b/ironic/common/images.py
index 51f6bea5a..8faac8131 100644
--- a/ironic/common/images.py
+++ b/ironic/common/images.py
@@ -20,7 +20,6 @@ Handling of VM disk images.
 """
 
 import os
-import re
 
 from oslo.config import cfg
 
@@ -28,8 +27,8 @@ from ironic.common import exception
 from ironic.common import image_service as service
 from ironic.common import utils
 from ironic.openstack.common import fileutils
+from ironic.openstack.common import imageutils
 from ironic.openstack.common import log as logging
-from ironic.openstack.common import strutils
 
 LOG = logging.getLogger(__name__)
 
@@ -43,129 +42,14 @@ CONF = cfg.CONF
 CONF.register_opts(image_opts)
 
 
-class QemuImgInfo(object):
-    BACKING_FILE_RE = re.compile((r"^(.*?)\s*\(actual\s+path\s*:"
-                                  r"\s+(.*?)\)\s*$"), re.I)
-    TOP_LEVEL_RE = re.compile(r"^([\w\d\s\_\-]+):(.*)$")
-    SIZE_RE = re.compile(r"\(\s*(\d+)\s+bytes\s*\)", re.I)
-
-    def __init__(self, cmd_output=None):
-        details = self._parse(cmd_output or '')
-        self.image = details.get('image')
-        self.backing_file = details.get('backing_file')
-        self.file_format = details.get('file_format')
-        self.virtual_size = details.get('virtual_size')
-        self.cluster_size = details.get('cluster_size')
-        self.disk_size = details.get('disk_size')
-        self.snapshots = details.get('snapshot_list', [])
-        self.encryption = details.get('encryption')
-
-    def __str__(self):
-        lines = [
-            'image: %s' % self.image,
-            'file_format: %s' % self.file_format,
-            'virtual_size: %s' % self.virtual_size,
-            'disk_size: %s' % self.disk_size,
-            'cluster_size: %s' % self.cluster_size,
-            'backing_file: %s' % self.backing_file,
-        ]
-        if self.snapshots:
-            lines.append("snapshots: %s" % self.snapshots)
-        return "\n".join(lines)
-
-    def _canonicalize(self, field):
-        # Standardize on underscores/lc/no dash and no spaces
-        # since qemu seems to have mixed outputs here... and
-        # this format allows for better integration with python
-        # - ie for usage in kwargs and such...
-        field = field.lower().strip()
-        return re.sub('[ -]', '_', field)
-
-    def _extract_bytes(self, details):
-        # Replace it with the byte amount
-        real_size = self.SIZE_RE.search(details)
-        if real_size:
-            details = real_size.group(1)
-        try:
-            details = strutils.to_bytes(details)
-        except (TypeError):
-            pass
-        return details
-
-    def _extract_details(self, root_cmd, root_details, lines_after):
-        real_details = root_details
-        if root_cmd == 'backing_file':
-            # Replace it with the real backing file
-            backing_match = self.BACKING_FILE_RE.match(root_details)
-            if backing_match:
-                real_details = backing_match.group(2).strip()
-        elif root_cmd in ['virtual_size', 'cluster_size', 'disk_size']:
-            # Replace it with the byte amount (if we can convert it)
-            real_details = self._extract_bytes(root_details)
-        elif root_cmd == 'file_format':
-            real_details = real_details.strip().lower()
-        elif root_cmd == 'snapshot_list':
-            # Next line should be a header, starting with 'ID'
-            if not lines_after or not lines_after[0].startswith("ID"):
-                msg = _("Snapshot list encountered but no header found!")
-                raise ValueError(msg)
-            del lines_after[0]
-            real_details = []
-            # This is the sprintf pattern we will try to match
-            # "%-10s%-20s%7s%20s%15s"
-            # ID TAG VM SIZE DATE VM CLOCK (current header)
-            while lines_after:
-                line = lines_after[0]
-                line_pieces = line.split()
-                if len(line_pieces) != 6:
-                    break
-                # Check against this pattern in the final position
-                # "%02d:%02d:%02d.%03d"
-                date_pieces = line_pieces[5].split(":")
-                if len(date_pieces) != 3:
-                    break
-                real_details.append({
-                    'id': line_pieces[0],
-                    'tag': line_pieces[1],
-                    'vm_size': line_pieces[2],
-                    'date': line_pieces[3],
-                    'vm_clock': line_pieces[4] + " " + line_pieces[5],
-                })
-                del lines_after[0]
-        return real_details
-
-    def _parse(self, cmd_output):
-        # Analysis done of qemu-img.c to figure out what is going on here
-        # Find all points start with some chars and then a ':' then a newline
-        # and then handle the results of those 'top level' items in a separate
-        # function.
-        #
-        # TODO(harlowja): newer versions might have a json output format
-        #                 we should switch to that whenever possible.
-        #                 see: http://bit.ly/XLJXDX
-        contents = {}
-        lines = [x for x in cmd_output.splitlines() if x.strip()]
-        while lines:
-            line = lines.pop(0)
-            top_level = self.TOP_LEVEL_RE.match(line)
-            if top_level:
-                root = self._canonicalize(top_level.group(1))
-                if not root:
-                    continue
-                root_details = top_level.group(2).strip()
-                details = self._extract_details(root, root_details, lines)
-                contents[root] = details
-        return contents
-
-
 def qemu_img_info(path):
     """Return an object containing the parsed output from qemu-img info."""
     if not os.path.exists(path):
-        return QemuImgInfo()
+        return imageutils.QemuImgInfo()
 
     out, err = utils.execute('env', 'LC_ALL=C', 'LANG=C',
                              'qemu-img', 'info', path)
-    return QemuImgInfo(out)
+    return imageutils.QemuImgInfo(out)
 
 
 def convert_image(source, dest, out_format, run_as_root=False):
diff --git a/ironic/openstack/common/imageutils.py b/ironic/openstack/common/imageutils.py
new file mode 100644
index 000000000..e157eef00
--- /dev/null
+++ b/ironic/openstack/common/imageutils.py
@@ -0,0 +1,152 @@
+# Copyright 2010 United States Government as represented by the
+# Administrator of the National Aeronautics and Space Administration.
+# All Rights Reserved.
+# Copyright (c) 2010 Citrix Systems, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""
+Helper methods to deal with images.
+"""
+
+import re
+
+from ironic.openstack.common.gettextutils import _
+from ironic.openstack.common import strutils
+
+
+class QemuImgInfo(object):
+    BACKING_FILE_RE = re.compile((r"^(.*?)\s*\(actual\s+path\s*:"
+                                  r"\s+(.*?)\)\s*$"), re.I)
+    TOP_LEVEL_RE = re.compile(r"^([\w\d\s\_\-]+):(.*)$")
+    SIZE_RE = re.compile(r"(\d*\.?\d+)(\w+)?(\s*\(\s*(\d+)\s+bytes\s*\))?",
+                         re.I)
+
+    def __init__(self, cmd_output=None):
+        details = self._parse(cmd_output or '')
+        self.image = details.get('image')
+        self.backing_file = details.get('backing_file')
+        self.file_format = details.get('file_format')
+        self.virtual_size = details.get('virtual_size')
+        self.cluster_size = details.get('cluster_size')
+        self.disk_size = details.get('disk_size')
+        self.snapshots = details.get('snapshot_list', [])
+        self.encrypted = details.get('encrypted')
+
+    def __str__(self):
+        lines = [
+            'image: %s' % self.image,
+            'file_format: %s' % self.file_format,
+            'virtual_size: %s' % self.virtual_size,
+            'disk_size: %s' % self.disk_size,
+            'cluster_size: %s' % self.cluster_size,
+            'backing_file: %s' % self.backing_file,
+        ]
+        if self.snapshots:
+            lines.append("snapshots: %s" % self.snapshots)
+        if self.encrypted:
+            lines.append("encrypted: %s" % self.encrypted)
+        return "\n".join(lines)
+
+    def _canonicalize(self, field):
+        # Standardize on underscores/lc/no dash and no spaces
+        # since qemu seems to have mixed outputs here... and
+        # this format allows for better integration with python
+        # - i.e. for usage in kwargs and such...
+        field = field.lower().strip()
+        for c in (" ", "-"):
+            field = field.replace(c, '_')
+        return field
+
+    def _extract_bytes(self, details):
+        # Replace it with the byte amount
+        real_size = self.SIZE_RE.search(details)
+        if not real_size:
+            raise ValueError(_('Invalid input value "%s".') % details)
+        magnitude = real_size.group(1)
+        unit_of_measure = real_size.group(2)
+        bytes_info = real_size.group(3)
+        if bytes_info:
+            return int(real_size.group(4))
+        elif not unit_of_measure:
+            return int(magnitude)
+        return strutils.string_to_bytes('%s%sB' % (magnitude, unit_of_measure),
+                                        return_int=True)
+
+    def _extract_details(self, root_cmd, root_details, lines_after):
+        real_details = root_details
+        if root_cmd == 'backing_file':
+            # Replace it with the real backing file
+            backing_match = self.BACKING_FILE_RE.match(root_details)
+            if backing_match:
+                real_details = backing_match.group(2).strip()
+        elif root_cmd in ['virtual_size', 'cluster_size', 'disk_size']:
+            # Replace it with the byte amount (if we can convert it)
+            if root_details == 'None':
+                real_details = 0
+            else:
+                real_details = self._extract_bytes(root_details)
+        elif root_cmd == 'file_format':
+            real_details = real_details.strip().lower()
+        elif root_cmd == 'snapshot_list':
+            # Next line should be a header, starting with 'ID'
+            if not lines_after or not lines_after[0].startswith("ID"):
+                msg = _("Snapshot list encountered but no header found!")
+                raise ValueError(msg)
+            del lines_after[0]
+            real_details = []
+            # This is the sprintf pattern we will try to match
+            # "%-10s%-20s%7s%20s%15s"
+            # ID TAG VM SIZE DATE VM CLOCK (current header)
+            while lines_after:
+                line = lines_after[0]
+                line_pieces = line.split()
+                if len(line_pieces) != 6:
+                    break
+                # Check against this pattern in the final position
+                # "%02d:%02d:%02d.%03d"
+                date_pieces = line_pieces[5].split(":")
+                if len(date_pieces) != 3:
+                    break
+                real_details.append({
+                    'id': line_pieces[0],
+                    'tag': line_pieces[1],
+                    'vm_size': line_pieces[2],
+                    'date': line_pieces[3],
+                    'vm_clock': line_pieces[4] + " " + line_pieces[5],
+                })
+                del lines_after[0]
+        return real_details
+
+    def _parse(self, cmd_output):
+        # Analysis done of qemu-img.c to figure out what is going on here
+        # Find all points start with some chars and then a ':' then a newline
+        # and then handle the results of those 'top level' items in a separate
+        # function.
+        #
+        # TODO(harlowja): newer versions might have a json output format
+        #                 we should switch to that whenever possible.
+        #                 see: http://bit.ly/XLJXDX
+        contents = {}
+        lines = [x for x in cmd_output.splitlines() if x.strip()]
+        while lines:
+            line = lines.pop(0)
+            top_level = self.TOP_LEVEL_RE.match(line)
+            if top_level:
+                root = self._canonicalize(top_level.group(1))
+                if not root:
+                    continue
+                root_details = top_level.group(2).strip()
+                details = self._extract_details(root, root_details, lines)
+                contents[root] = details
+        return contents
diff --git a/ironic/openstack/common/strutils.py b/ironic/openstack/common/strutils.py
index e09244ad2..3f509b222 100644
--- a/ironic/openstack/common/strutils.py
+++ b/ironic/openstack/common/strutils.py
@@ -1,5 +1,3 @@
-# vim: tabstop=4 shiftwidth=4 softtabstop=4
-
 # Copyright 2011 OpenStack Foundation.
 # All Rights Reserved.
 #
@@ -19,25 +17,31 @@
 System-level utilities and helper functions.
 """
 
+import math
 import re
 import sys
 import unicodedata
 
 import six
 
-from ironic.openstack.common.gettextutils import _  # noqa
+from ironic.openstack.common.gettextutils import _
 
 
-# Used for looking up extensions of text
-# to their 'multiplied' byte amount
-BYTE_MULTIPLIERS = {
-    '': 1,
-    't': 1024 ** 4,
-    'g': 1024 ** 3,
-    'm': 1024 ** 2,
-    'k': 1024,
+UNIT_PREFIX_EXPONENT = {
+    'k': 1,
+    'K': 1,
+    'Ki': 1,
+    'M': 2,
+    'Mi': 2,
+    'G': 3,
+    'Gi': 3,
+    'T': 4,
+    'Ti': 4,
+}
+UNIT_SYSTEM_INFO = {
+    'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')),
+    'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')),
 }
-BYTE_REGEX = re.compile(r'(^-?\d+)(\D*)')
 
 TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
 FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')
@@ -46,6 +50,28 @@ SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
 SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")
 
 
+# NOTE(flaper87): The following 3 globals are used by `mask_password`
+_SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password']
+
+# NOTE(ldbragst): Let's build a list of regex objects using the list of
+# _SANITIZE_KEYS we already have. This way, we only have to add the new key
+# to the list of _SANITIZE_KEYS and we can generate regular expressions
+# for XML and JSON automatically.
+_SANITIZE_PATTERNS = []
+_FORMAT_PATTERNS = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])',
+                    r'(<%(key)s>).*?(</%(key)s>)',
+                    r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])',
+                    r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])',
+                    r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?[\'"])'
+                    '.*?([\'"])',
+                    r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)']
+
+for key in _SANITIZE_KEYS:
+    for pattern in _FORMAT_PATTERNS:
+        reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
+        _SANITIZE_PATTERNS.append(reg_ex)
+
+
 def int_from_bool_as_string(subject):
     """Interpret a string as a boolean and return either 1 or 0.
 
@@ -60,12 +86,12 @@ def int_from_bool_as_string(subject):
     return bool_from_string(subject) and 1 or 0
 
 
-def bool_from_string(subject, strict=False):
+def bool_from_string(subject, strict=False, default=False):
     """Interpret a string as a boolean.
 
     A case-insensitive match is performed such that strings matching 't',
     'true', 'on', 'y', 'yes', or '1' are considered True and, when
-    `strict=False`, anything else is considered False.
+    `strict=False`, anything else returns the value specified by 'default'.
 
     Useful for JSON-decoded stuff and config file parsing.
 
@@ -74,7 +100,7 @@ def bool_from_string(subject, strict=False):
     Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'.
     """
     if not isinstance(subject, six.string_types):
-        subject = str(subject)
+        subject = six.text_type(subject)
 
     lowered = subject.strip().lower()
 
@@ -90,11 +116,12 @@ def bool_from_string(subject, strict=False):
                                       'acceptable': acceptable}
         raise ValueError(msg)
     else:
-        return False
+        return default
 
 
 def safe_decode(text, incoming=None, errors='strict'):
-    """Decodes incoming str using `incoming` if they're not already unicode.
+    """Decodes incoming text/bytes string using `incoming` if they're not
+       already unicode.
 
     :param incoming: Text's current encoding
     :param errors: Errors handling policy. See here for valid
@@ -103,7 +130,7 @@ def safe_decode(text, incoming=None, errors='strict'):
                 representation of it.
     :raises TypeError: If text is not an instance of str
     """
-    if not isinstance(text, six.string_types):
+    if not isinstance(text, (six.string_types, six.binary_type)):
         raise TypeError("%s can't be decoded" % type(text))
 
     if isinstance(text, six.text_type):
@@ -133,7 +160,7 @@ def safe_decode(text, incoming=None, errors='strict'):
 
 def safe_encode(text, incoming=None,
                 encoding='utf-8', errors='strict'):
-    """Encodes incoming str/unicode using `encoding`.
+    """Encodes incoming text/bytes string using `encoding`.
 
     If incoming is not specified, text is expected to be encoded with
     current python's default encoding. (`sys.getdefaultencoding`)
@@ -146,7 +173,7 @@ def safe_encode(text, incoming=None,
                 representation of it.
     :raises TypeError: If text is not an instance of str
     """
-    if not isinstance(text, six.string_types):
+    if not isinstance(text, (six.string_types, six.binary_type)):
         raise TypeError("%s can't be encoded" % type(text))
 
     if not incoming:
@@ -159,38 +186,54 @@ def safe_encode(text, incoming=None,
         # Decode text before encoding it with `encoding`
         text = safe_decode(text, incoming, errors)
         return text.encode(encoding, errors)
+    else:
+        return text
+
+
+def string_to_bytes(text, unit_system='IEC', return_int=False):
+    """Converts a string into an float representation of bytes.
 
-    return text
+    The units supported for IEC ::
 
+        Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it)
+        KB, KiB, MB, MiB, GB, GiB, TB, TiB
 
-def to_bytes(text, default=0):
-    """Converts a string into an integer of bytes.
+    The units supported for SI ::
 
-    Looks at the last characters of the text to determine
-    what conversion is needed to turn the input text into a byte number.
-    Supports "B, K(B), M(B), G(B), and T(B)". (case insensitive)
+        kb(it), Mb(it), Gb(it), Tb(it)
+        kB, MB, GB, TB
+
+    Note that the SI unit system does not support capital letter 'K'
 
     :param text: String input for bytes size conversion.
-    :param default: Default return value when text is blank.
+    :param unit_system: Unit system for byte size conversion.
+    :param return_int: If True, returns integer representation of text
+                       in bytes. (default: decimal)
+    :returns: Numerical representation of text in bytes.
+    :raises ValueError: If text has an invalid value.
 
     """
-    match = BYTE_REGEX.search(text)
+    try:
+        base, reg_ex = UNIT_SYSTEM_INFO[unit_system]
+    except KeyError:
+        msg = _('Invalid unit system: "%s"') % unit_system
+        raise ValueError(msg)
+    match = reg_ex.match(text)
     if match:
-        magnitude = int(match.group(1))
-        mult_key_org = match.group(2)
-        if not mult_key_org:
-            return magnitude
-    elif text:
+        magnitude = float(match.group(1))
+        unit_prefix = match.group(2)
+        if match.group(3) in ['b', 'bit']:
+            magnitude /= 8
+    else:
         msg = _('Invalid string format: %s') % text
-        raise TypeError(msg)
+        raise ValueError(msg)
+    if not unit_prefix:
+        res = magnitude
     else:
-        return default
-    mult_key = mult_key_org.lower().replace('b', '', 1)
-    multiplier = BYTE_MULTIPLIERS.get(mult_key)
-    if multiplier is None:
-        msg = _('Unknown byte multiplier: %s') % mult_key_org
-        raise TypeError(msg)
-    return magnitude * multiplier
+        res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix])
+    if return_int:
+        return int(math.ceil(res))
+    return res
 
 
 def to_slug(value, incoming=None, errors="strict"):
@@ -216,3 +259,37 @@ def to_slug(value, incoming=None, errors="strict"):
         "ascii", "ignore").decode("ascii")
     value = SLUGIFY_STRIP_RE.sub("", value).strip().lower()
     return SLUGIFY_HYPHENATE_RE.sub("-", value)
+
+
+def mask_password(message, secret="***"):
+    """Replace password with 'secret' in message.
+
+    :param message: The string which includes security information.
+    :param secret: value with which to replace passwords.
+    :returns: The unicode value of message with the password fields masked.
+
+    For example:
+
+    >>> mask_password("'adminPass' : 'aaaaa'")
+    "'adminPass' : '***'"
+    >>> mask_password("'admin_pass' : 'aaaaa'")
+    "'admin_pass' : '***'"
+    >>> mask_password('"password" : "aaaaa"')
+    '"password" : "***"'
+    >>> mask_password("'original_password' : 'aaaaa'")
+    "'original_password' : '***'"
+    >>> mask_password("u'original_password' :   u'aaaaa'")
+    "u'original_password' :   u'***'"
+    """
+    message = six.text_type(message)
+
+    # NOTE(ldbragst): Check to see if anything in message contains any key
+    # specified in _SANITIZE_KEYS, if not then just return the message since
+    # we don't have to mask any passwords.
+    if not any(key in message for key in _SANITIZE_KEYS):
+        return message
+
+    secret = r'\g<1>' + secret + r'\g<2>'
+    for pattern in _SANITIZE_PATTERNS:
+        message = re.sub(pattern, secret, message)
+    return message
diff --git a/openstack-common.conf b/openstack-common.conf
index 03d913a64..4562c7dc0 100644
--- a/openstack-common.conf
+++ b/openstack-common.conf
@@ -8,6 +8,7 @@ module=eventlet_backdoor
 module=excutils
 module=fileutils
 module=gettextutils
+module=imageutils
 module=importutils
 module=jsonutils
 module=local
author	Zhongyue Luo <zhongyue.nah@intel.com>	2014-08-05 11:03:33 +0800
committer	Zhongyue Luo <zhongyue.nah@intel.com>	2014-08-05 12:57:50 +0800
commit	5d4d79fbd1651a5285bf7334a71245b40585cef5 (patch)
tree	aa80c99d463902ae60d7fe4a83b8fe52cc8cbedd
parent	797fdbead2d4cde2c4ddfb3b49a2f367dd9f0487 (diff)
download	ironic-5d4d79fbd1651a5285bf7334a71245b40585cef5.tar.gz