# Copyright 2015 Tesora Inc. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import abc import ast import csv import configparser import io import re import sys import xmltodict import yaml from oslo_serialization import base64 from oslo_serialization import jsonutils from oslo_utils import strutils from trove.common import utils as trove_utils class StringConverter(object): """A passthrough string-to-object converter. """ def __init__(self, object_mappings): """ :param object_mappings: string-to-object mappings :type object_mappings: dict """ self._object_mappings = object_mappings def to_strings(self, items): """Recursively convert collection items to strings. :returns: Copy of the input collection with all items converted. """ if trove_utils.is_collection(items): return map(self.to_strings, items) return self._to_string(items) def to_objects(self, items): """Recursively convert collection string to objects. :returns: Copy of the input collection with all items converted. """ if trove_utils.is_collection(items): return map(self.to_objects, items) return self._to_object(items) def _to_string(self, value): for k, v in self._object_mappings.items(): if v is value: return k return str(value) def _to_object(self, value): # Return known mappings and quoted strings right away. if value in self._object_mappings: return self._object_mappings[value] elif (isinstance(value, str) and re.match("^'(.*)'|\"(.*)\"$", value)): return value try: return ast.literal_eval(value) except Exception: return value class StreamCodec(object, metaclass=abc.ABCMeta): @abc.abstractmethod def serialize(self, data): """Serialize a Python object into a stream. """ @abc.abstractmethod def deserialize(self, stream): """Deserialize stream data into a Python structure. """ class IdentityCodec(StreamCodec): """ A basic passthrough codec. Does not modify the data in any way. """ def serialize(self, data): return data def deserialize(self, stream): return stream class YamlCodec(StreamCodec): """ Read/write data from/into a YAML config file. a: 1 b: {c: 3, d: 4} ... The above file content (flow-style) would be represented as: {'a': 1, 'b': {'c': 3, 'd': 4,} ... } """ def __init__(self, default_flow_style=False): """ :param default_flow_style: Use flow-style (inline) formatting of nested collections. :type default_flow_style: boolean """ self._default_flow_style = default_flow_style def serialize(self, dict_data): return yaml.dump(dict_data, Dumper=self.dumper, default_flow_style=self._default_flow_style) def deserialize(self, stream): return yaml.load(stream, Loader=self.loader) @property def loader(self): return yaml.loader.Loader @property def dumper(self): return yaml.dumper.Dumper class SafeYamlCodec(YamlCodec): """ Same as YamlCodec except that it uses safe Loader and Dumper which encode Unicode strings and produce only basic YAML tags. """ def __init__(self, default_flow_style=False): super(SafeYamlCodec, self).__init__( default_flow_style=default_flow_style) @property def loader(self): return yaml.loader.SafeLoader @property def dumper(self): return yaml.dumper.SafeDumper class IniCodec(StreamCodec): """ Read/write data from/into an ini-style config file. [section_1] key = value key = value ... [section_2] key = value key = value ... The above file content would be represented as: {'section_1': {'key': value, 'key': value, ...}, 'section_2': {'key': value, 'key': value, ...} ... } """ def __init__(self, default_value=None, comment_markers=('#', ';')): """ :param default_value: Default value for keys with no value. If set, all keys are written as 'key = value'. The key is written without trailing '=' if None. :type default_value: object """ self._default_value = default_value self._comment_markers = comment_markers def serialize(self, dict_data): parser = self._init_config_parser(dict_data) output = io.StringIO() parser.write(output) return output.getvalue() def deserialize(self, stream): parser = self._init_config_parser() parser.read_file(self._pre_parse(stream)) return {s: {k: StringConverter({None: self._default_value}).to_objects(v) for k, v in parser.items(s, raw=True)} for s in parser.sections()} def _pre_parse(self, stream): buf = io.StringIO() for line in io.StringIO(stream): # Ignore commented lines. if not line.startswith(self._comment_markers): # Strip leading and trailing whitespaces from each line. buf.write(line.strip() + '\n') # Rewind the output buffer. buf.flush() buf.seek(0) return buf def _init_config_parser(self, sections=None): # SafeConfigParser was deprecated in Python 3.2 if sys.version_info >= (3, 2): parser = configparser.ConfigParser(allow_no_value=True) else: parser = configparser.SafeConfigParser(allow_no_value=True) if sections: for section in sections: parser.add_section(section) for key, value in sections[section].items(): str_val = StringConverter( {self._default_value: None}).to_strings(value) parser.set(section, key, str(str_val) if str_val is not None else str_val) return parser class PropertiesCodec(StreamCodec): """ Read/write data from/into a property-style config file. key1 k1arg1 k1arg2 ... k1argN key2 k2arg1 k2arg2 ... k2argN key3 k3arg1 k3arg2 ... key3 k3arg3 k3arg4 ... ... The above file content would be represented as: {'key1': [k1arg1, k1arg2 ... k1argN], 'key2': [k2arg1, k2arg2 ... k2argN] 'key3': [[k3arg1, k3arg2, ...], [k3arg3, k3arg4, ...]] ... } """ QUOTING_MODE = csv.QUOTE_MINIMAL STRICT_MODE = False SKIP_INIT_SPACE = True def __init__(self, delimiter=' ', comment_markers=('#'), unpack_singletons=True, string_mappings=None): """ :param delimiter: A one-character used to separate fields. :type delimiter: string :param empty_value: Value to represent None in the output. :type empty_value: object :param comment_markers: List of comment markers. :type comment_markers: list :param unpack_singletons: Whether to unpack singleton collections (collections with only a single item). :type unpack_singletons: boolean :param string_mappings: User-defined string representations of Python objects. :type string_mappings: dict """ self._delimiter = delimiter self._comment_markers = comment_markers self._string_converter = StringConverter(string_mappings or {}) self._unpack_singletons = unpack_singletons def serialize(self, dict_data): output = io.StringIO() writer = csv.writer(output, delimiter=self._delimiter, quoting=self.QUOTING_MODE, strict=self.STRICT_MODE, skipinitialspace=self.SKIP_INIT_SPACE) for key, value in dict_data.items(): writer.writerows(self._to_rows(key, value)) return output.getvalue() def deserialize(self, stream): reader = csv.reader(io.StringIO(stream), delimiter=self._delimiter, quoting=self.QUOTING_MODE, strict=self.STRICT_MODE, skipinitialspace=self.SKIP_INIT_SPACE) return self._to_dict(reader) def _to_dict(self, reader): data_dict = {} for row in reader: if row: key = row[0].strip() # Ignore comment lines. if not key.strip().startswith(self._comment_markers): # NOTE(zhaochao): a list object is expected for # trove_utils.unpack_singleton, however in python3 # map objects won't be treated as lists, so we # convert the result of StringConverter.to_objects # to a list explicitly. items = list(self._string_converter.to_objects( [v if v else None for v in map(self._strip_comments, row[1:])])) current = data_dict.get(key) if current is not None: current.append(trove_utils.unpack_singleton(items) if self._unpack_singletons else items) else: data_dict.update({key: [items]}) if self._unpack_singletons: # Unpack singleton values. # NOTE(zhaochao): In Python 3.x, dict.items() returns a view # object, which will reflect the changes of the dict itself: # https://docs.python.org/3/library/stdtypes.html#dict-views # This means as we're changing the dict, dict.items() cannot # guarantee we're safely iterating all entries in the dict. # Manually converting the result of dict.items() to a list will # fix. for k, v in list(data_dict.items()): data_dict.update({k: trove_utils.unpack_singleton(v)}) return data_dict def _strip_comments(self, value): # Strip in-line comments. for marker in self._comment_markers: value = value.split(marker)[0] return value.strip() def _to_rows(self, header, items): rows = [] if trove_utils.is_collection(items): if any(trove_utils.is_collection(item) for item in items): # This is multi-row property. for item in items: rows.extend(self._to_rows(header, item)) else: # This is a single-row property with multiple arguments. rows.append(self._to_list( header, self._string_converter.to_strings(items))) else: # This is a single-row property with only one argument. # Note(zhaochao): csv.writerows expects a list object before # python 3.5, but map objects won't be treated as lists in # python 3, so we explicitly convert the result of # StringConverter.to_strings to a list here to support py34 # unittests. rows.append( list(self._string_converter.to_strings( self._to_list(header, items)))) return rows def _to_list(self, *items): container = [] for item in items: if trove_utils.is_collection(item): # This item is a nested collection - unpack it. container.extend(self._to_list(*item)) else: # This item is not a collection - append it to the list. container.append(item) return container class KeyValueCodec(StreamCodec): """ Read/write data from/into a simple key=value file. key1=value1 key2=value2 key3=value3 ... The above file content would be represented as: {'key1': 'value1', 'key2': 'value2', 'key3': 'value3', ... } """ BOOL_PYTHON = 0 # True, False BOOL_LOWER = 1 # true, false BOOL_UPPER = 2 # TRUE, FALSE def __init__(self, delimiter='=', comment_marker='#', line_terminator='\r\n', value_quoting=False, value_quote_char="'", bool_case=BOOL_PYTHON, big_ints=False, hidden_marker=None): """ :param delimiter: string placed between key and value :param comment_marker: string indicating comment line in file :param line_terminator: string placed between lines :param value_quoting: whether or not to quote string values :param value_quote_char: character used to quote string values :param bool_case: BOOL_* setting case of bool values :param big_ints: treat K/M/G at the end of ints as an int :param hidden_marker: pattern prefixing hidden param """ self._delimeter = delimiter self._comment_marker = comment_marker self._line_terminator = line_terminator self._value_quoting = value_quoting self._value_quote_char = value_quote_char self._bool_case = bool_case self._big_ints = big_ints self._hidden_marker = hidden_marker def serialize(self, dict_data): lines = [] for k, v in dict_data.items(): lines.append(k + self._delimeter + self.serialize_value(v)) return self._line_terminator.join(lines) def deserialize(self, stream): # Note(zhaochao): In Python 3, when files are opened in text mode, # newlines will be translated to '\n' by default, so we just split # the stream by '\n'. if sys.version_info[0] >= 3: lines = stream.split('\n') else: lines = stream.split(self._line_terminator) result = {} for line in lines: line = line.lstrip().rstrip() if line == '' or line.startswith(self._comment_marker): continue k, v = re.split(re.escape(self._delimeter), line, 1) if self._value_quoting and v.startswith(self._value_quote_char): # remove trailing comments v = re.sub(r'%s *%s.*$' % ("'", '#'), '', v) v = v.lstrip( self._value_quote_char).rstrip( self._value_quote_char) elif v.lower() in ['true', 'false']: v = strutils.bool_from_string(v.lower()) else: # remove trailing comments v = re.sub('%s.*$' % self._comment_marker, '', v) if self._hidden_marker and v.startswith(self._hidden_marker): continue result[k.strip()] = v return result def serialize_value(self, value): if isinstance(value, bool): if self._bool_case == self.BOOL_PYTHON: value = str(value) elif self._bool_case == self.BOOL_LOWER: value = str(value).lower() elif self._bool_case == self.BOOL_UPPER: value = str(value).upper() if self.should_quote_value(value): value = self._value_quote_char + value + self._value_quote_char return str(value) def should_quote_value(self, value): if not self._value_quoting: return False if isinstance(value, bool) or isinstance(value, int): return False if value.lower() in ['true', 'false']: return False try: int(value) return False except ValueError: pass if self._big_ints and re.match(r'\d+[kKmMgGtTpP]', value): return False return True class JsonCodec(StreamCodec): def serialize(self, dict_data): return jsonutils.dumps(dict_data) def deserialize(self, stream): if type(stream) == str: return jsonutils.load(io.StringIO(stream)) if type(stream) == bytes: return jsonutils.load(io.BytesIO(stream)) class Base64Codec(StreamCodec): """Serialize (encode) and deserialize (decode) using the base64 codec. To read binary data from a file and b64encode it, used the decode=False flag on operating_system's read calls. Use encode=False to decode binary data before writing to a file as well. """ # NOTE(zhaochao): migrate to oslo_serialization.base64 to serialize(return # a text object) and deserialize(return a bytes object) data. def serialize(self, data): return base64.encode_as_text(data) def deserialize(self, stream): return base64.decode_as_bytes(stream) class XmlCodec(StreamCodec): def __init__(self, encoding='utf-8'): self._encoding = encoding def serialize(self, dict_data): return xmltodict.unparse( dict_data, output=None, encoding=self._encoding, pretty=True) def deserialize(self, stream): return xmltodict.parse(stream, encoding=self._encoding)