diff options
author | James Ennis <james.ennis@codethink.co.uk> | 2019-03-07 16:34:10 +0000 |
---|---|---|
committer | James Ennis <james.ennis@codethink.co.uk> | 2019-03-07 16:34:10 +0000 |
commit | 8ee336e948eb170fd9714bd060fc4f1e4628eeea (patch) | |
tree | 2884e9a6724a9e071a993607eca33f1bbca19199 | |
parent | 547d212c1d3bae5ef9d0e08b71091fe971a447cd (diff) | |
download | buildstream-8ee336e948eb170fd9714bd060fc4f1e4628eeea.tar.gz |
Add _yaml_roundtrip module to be used when we track or roundtrip dump
Currently, this module is just a copy of _yaml.py. We should strip this
of everything irrelevant
-rw-r--r-- | buildstream/_projectrefs.py | 9 | ||||
-rw-r--r-- | buildstream/_stream.py | 5 | ||||
-rw-r--r-- | buildstream/_workspaces.py | 17 | ||||
-rw-r--r-- | buildstream/_yaml_roundtrip.py | 1055 | ||||
-rw-r--r-- | buildstream/element.py | 13 | ||||
-rw-r--r-- | buildstream/plugintestutils/runcli.py | 6 | ||||
-rw-r--r-- | buildstream/source.py | 5 |
7 files changed, 1091 insertions, 19 deletions
diff --git a/buildstream/_projectrefs.py b/buildstream/_projectrefs.py index 4009d7449..6edfa21bb 100644 --- a/buildstream/_projectrefs.py +++ b/buildstream/_projectrefs.py @@ -19,6 +19,7 @@ import os from . import _yaml +from . import _yaml_roundtrip from ._exceptions import LoadError, LoadErrorReason @@ -64,7 +65,7 @@ class ProjectRefs(): try: self._toplevel_node = _yaml.load(self._fullpath, shortname=self._base_name, copy_tree=True) - provenance = _yaml.node_get_provenance(self._toplevel_node) + provenance = _yaml_roundtrip.node_get_provenance(self._toplevel_node) self._toplevel_save = provenance.toplevel # Process any project options immediately @@ -72,7 +73,7 @@ class ProjectRefs(): # Run any final assertions on the project.refs, just incase there # are list composition directives or anything left unprocessed. - _yaml.node_final_assertions(self._toplevel_node) + _yaml_roundtrip.node_final_assertions(self._toplevel_node) except LoadError as e: if e.reason != LoadErrorReason.MISSING_FILE: @@ -83,7 +84,7 @@ class ProjectRefs(): self._toplevel_node = {} self._toplevel_save = self._toplevel_node - _yaml.node_validate(self._toplevel_node, ['projects']) + _yaml_roundtrip.node_validate(self._toplevel_node, ['projects']) # Ensure we create our toplevel entry point on the fly here for node in [self._toplevel_node, self._toplevel_save]: @@ -95,7 +96,7 @@ class ProjectRefs(): # Save the project.refs file with any local changes # def save(self): - _yaml.dump(self._toplevel_save, self._fullpath) + _yaml_roundtrip.dump(self._toplevel_save, self._fullpath) # lookup_ref() # diff --git a/buildstream/_stream.py b/buildstream/_stream.py index b0fce3817..72fe2cd8c 100644 --- a/buildstream/_stream.py +++ b/buildstream/_stream.py @@ -35,7 +35,7 @@ from ._message import Message, MessageType from ._scheduler import Scheduler, SchedStatus, TrackQueue, FetchQueue, BuildQueue, PullQueue, PushQueue from ._pipeline import Pipeline, PipelineSelection from ._profile import Topics, profile_start, profile_end -from . import utils, _yaml, _site +from . import utils, _yaml, _site, _yaml_roundtrip from . import Scope, Consistency @@ -812,7 +812,8 @@ class Stream(): } workspaces.append(workspace_detail) - _yaml.dump({ + # NOTE: Not sure about this one + _yaml_roundtrip.dump({ 'workspaces': workspaces }) diff --git a/buildstream/_workspaces.py b/buildstream/_workspaces.py index 24a3cc8d3..81d2f863f 100644 --- a/buildstream/_workspaces.py +++ b/buildstream/_workspaces.py @@ -20,6 +20,7 @@ import os from . import utils from . import _yaml +from . import _yaml_roundtrip from ._exceptions import LoadError, LoadErrorReason @@ -114,6 +115,7 @@ class WorkspaceProject(): def load(cls, directory): workspace_file = os.path.join(directory, WORKSPACE_PROJECT_FILE) if os.path.exists(workspace_file): + # NOTE: This probably doesn't need to be loaded with provenance either data_dict = _yaml.load(workspace_file) return cls.from_dict(directory, data_dict) else: @@ -125,7 +127,9 @@ class WorkspaceProject(): # def write(self): os.makedirs(self._directory, exist_ok=True) - _yaml.dump(self.to_dict(), self.get_filename()) + # This isn't even a roundtrip, it's just dumping a dict created in + # to_dict() to the file + _yaml_roundtrip.dump(self.to_dict(), self.get_filename()) # get_filename() # @@ -530,8 +534,11 @@ class Workspaces(): } } os.makedirs(self._bst_directory, exist_ok=True) - _yaml.dump(_yaml.node_sanitize(config), - self._get_filename()) + # NOTE: We're just dumping to the workspaces.yml file here, potentially we could + # just use ruamel.yaml.dump() here? I don't think this file is required for cache, + # key calc + _yaml_roundtrip.dump(_yaml.node_sanitize(config), + self._get_filename()) # _load_config() # @@ -545,6 +552,8 @@ class Workspaces(): def _load_config(self): workspace_file = self._get_filename() try: + # NOTE: Again, we're not expecting the user to be tweaking this file, and it doesn't + # make a difference to cache keys, surely we just load without provenance? node = _yaml.load(workspace_file) except LoadError as e: if e.reason == LoadErrorReason.MISSING_FILE: @@ -570,6 +579,7 @@ class Workspaces(): # Raises: LoadError if there was a problem with the workspace config # def _parse_workspace_config(self, workspaces): + # NOTE: As we're providing this with a default value, it won't fail. version = _yaml.node_get(workspaces, int, "format-version", default_value=0) if version == 0: @@ -624,6 +634,7 @@ class Workspaces(): def _load_workspace(self, node): dictionary = { 'prepared': _yaml.node_get(node, bool, 'prepared', default_value=False), + ## NOTE: WE MIGHT NEED PROVENANCE HERE.... 'path': _yaml.node_get(node, str, 'path'), 'last_successful': _yaml.node_get(node, str, 'last_successful', default_value=None), 'running_files': _yaml.node_get(node, dict, 'running_files', default_value=None), diff --git a/buildstream/_yaml_roundtrip.py b/buildstream/_yaml_roundtrip.py new file mode 100644 index 000000000..482e23bb3 --- /dev/null +++ b/buildstream/_yaml_roundtrip.py @@ -0,0 +1,1055 @@ +# +# Copyright (C) 2018 Codethink Limited +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> + +import sys +import collections +import string +from copy import deepcopy +from contextlib import ExitStack + +from ruamel import yaml +from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter +from ruamel.yaml.constructor import RoundTripConstructor +from ._exceptions import LoadError, LoadErrorReason + +# This overrides the ruamel constructor to treat everything as a string +RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int', RoundTripConstructor.construct_yaml_str) +RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:float', RoundTripConstructor.construct_yaml_str) + +# We store information in the loaded yaml on a DictProvenance +# stored in all dictionaries under this key +PROVENANCE_KEY = '__bst_provenance_info' + + +# Provides information about file for provenance +# +# Args: +# name (str): Full path to the file +# shortname (str): Relative path to the file +# project (Project): Project where the shortname is relative from +class ProvenanceFile(): + def __init__(self, name, shortname, project): + self.name = name + self.shortname = shortname + self.project = project + + +# Provenance tracks the origin of a given node in the parsed dictionary. +# +# Args: +# node (dict, list, value): A binding to the originally parsed value +# filename (string): The filename the node was loaded from +# toplevel (dict): The toplevel of the loaded file, suitable for later dumps +# line (int): The line number where node was parsed +# col (int): The column number where node was parsed +# +class Provenance(): + def __init__(self, filename, node, toplevel, line=0, col=0): + self.filename = filename + self.node = node + self.toplevel = toplevel + self.line = line + self.col = col + + # Convert a Provenance to a string for error reporting + def __str__(self): + return "{} [line {:d} column {:d}]".format(self.filename.shortname, self.line, self.col) + + # Abstract method + def clone(self): + pass # pragma: nocover + + +# A Provenance for dictionaries, these are stored in the copy of the +# loaded YAML tree and track the provenance of all members +# +class DictProvenance(Provenance): + def __init__(self, filename, node, toplevel, line=None, col=None): + + if line is None or col is None: + # Special case for loading an empty dict + if hasattr(node, 'lc'): + line = node.lc.line + 1 + col = node.lc.col + else: + line = 1 + col = 0 + + super(DictProvenance, self).__init__(filename, node, toplevel, line=line, col=col) + + self.members = {} + + def clone(self): + provenance = DictProvenance(self.filename, self.node, self.toplevel, + line=self.line, col=self.col) + + provenance.members = { + member_name: member.clone() + for member_name, member in self.members.items() + } + return provenance + + +# A Provenance for dict members +# +class MemberProvenance(Provenance): + def __init__(self, filename, parent_dict, member_name, toplevel, + node=None, line=None, col=None): + + if parent_dict is not None: + node = parent_dict[member_name] + line, col = parent_dict.lc.value(member_name) + line += 1 + + super(MemberProvenance, self).__init__( + filename, node, toplevel, line=line, col=col) + + # Only used if member is a list + self.elements = [] + + def clone(self): + provenance = MemberProvenance(self.filename, None, None, self.toplevel, + node=self.node, line=self.line, col=self.col) + provenance.elements = [e.clone() for e in self.elements] + return provenance + + +# A Provenance for list elements +# +class ElementProvenance(Provenance): + def __init__(self, filename, parent_list, index, toplevel, + node=None, line=None, col=None): + + if parent_list is not None: + node = parent_list[index] + line, col = parent_list.lc.item(index) + line += 1 + + super(ElementProvenance, self).__init__( + filename, node, toplevel, line=line, col=col) + + # Only used if element is a list + self.elements = [] + + def clone(self): + provenance = ElementProvenance(self.filename, None, None, self.toplevel, + node=self.node, line=self.line, col=self.col) + + provenance.elements = [e.clone for e in self.elements] + return provenance + + +# These exceptions are intended to be caught entirely within +# the BuildStream framework, hence they do not reside in the +# public exceptions.py +class CompositeError(Exception): + def __init__(self, path, message): + super(CompositeError, self).__init__(message) + self.path = path + + +class CompositeTypeError(CompositeError): + def __init__(self, path, expected_type, actual_type): + super(CompositeTypeError, self).__init__( + path, + "Error compositing dictionary key '{}', expected source type '{}' " + "but received type '{}'" + .format(path, expected_type.__name__, actual_type.__name__)) + self.expected_type = expected_type + self.actual_type = actual_type + + +# Loads a dictionary from some YAML +# +# Args: +# filename (str): The YAML file to load +# shortname (str): The filename in shorthand for error reporting (or None) +# copy_tree (bool): Whether to make a copy, preserving the original toplevels +# for later serialization +# yaml_cache (YamlCache): A yaml cache to consult rather than parsing +# +# Returns (dict): A loaded copy of the YAML file with provenance information +# +# Raises: LoadError +# +def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None): + if not shortname: + shortname = filename + + file = ProvenanceFile(filename, shortname, project) + + try: + data = None + with open(filename) as f: + contents = f.read() + if yaml_cache: + data, key = yaml_cache.get(project, filename, contents, copy_tree) + + if not data: + data = load_data(contents, file, copy_tree=copy_tree) + if yaml_cache: + yaml_cache.put_from_key(project, filename, key, data) + + return data + except FileNotFoundError as e: + raise LoadError(LoadErrorReason.MISSING_FILE, + "Could not find file at {}".format(filename)) from e + except IsADirectoryError as e: + raise LoadError(LoadErrorReason.LOADING_DIRECTORY, + "{} is a directory. bst command expects a .bst file." + .format(filename)) from e + + +# Like load(), but doesnt require the data to be in a file +# +def load_data(data, file=None, copy_tree=False): + + try: + contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True) + except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e: + raise LoadError(LoadErrorReason.INVALID_YAML, + "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e + + if not isinstance(contents, dict): + # Special case allowance for None, when the loaded file has only comments in it. + if contents is None: + contents = {} + else: + raise LoadError(LoadErrorReason.INVALID_YAML, + "YAML file has content of type '{}' instead of expected type 'dict': {}" + .format(type(contents).__name__, file.name)) + + return node_decorated_copy(file, contents, copy_tree=copy_tree) + + +# Dumps a previously loaded YAML node to a file +# +# Args: +# node (dict): A node previously loaded with _yaml.load() above +# filename (str): The YAML file to load +# +def dump(node, filename=None): + with ExitStack() as stack: + if filename: + from . import utils + f = stack.enter_context(utils.save_file_atomic(filename, 'w')) + else: + f = sys.stdout + yaml.round_trip_dump(node, f) + + +# node_decorated_copy() +# +# Create a copy of a loaded dict tree decorated with Provenance +# information, used directly after loading yaml +# +# Args: +# filename (str): The filename +# toplevel (node): The toplevel dictionary node +# copy_tree (bool): Whether to load a copy and preserve the original +# +# Returns: A copy of the toplevel decorated with Provinance +# +def node_decorated_copy(filename, toplevel, copy_tree=False): + if copy_tree: + result = deepcopy(toplevel) + else: + result = toplevel + + node_decorate_dict(filename, result, toplevel, toplevel) + + return result + + +def node_decorate_dict(filename, target, source, toplevel): + provenance = DictProvenance(filename, source, toplevel) + target[PROVENANCE_KEY] = provenance + + for key, value in node_items(source): + member = MemberProvenance(filename, source, key, toplevel) + provenance.members[key] = member + + target_value = target.get(key) + if isinstance(value, collections.abc.Mapping): + node_decorate_dict(filename, target_value, value, toplevel) + elif isinstance(value, list): + member.elements = node_decorate_list(filename, target_value, value, toplevel) + + +def node_decorate_list(filename, target, source, toplevel): + + elements = [] + + for item in source: + idx = source.index(item) + target_item = target[idx] + element = ElementProvenance(filename, source, idx, toplevel) + + if isinstance(item, collections.abc.Mapping): + node_decorate_dict(filename, target_item, item, toplevel) + elif isinstance(item, list): + element.elements = node_decorate_list(filename, target_item, item, toplevel) + + elements.append(element) + + return elements + + +# node_get_provenance() +# +# Gets the provenance for a node +# +# Args: +# node (dict): a dictionary +# key (str): key in the dictionary +# indices (list of indexes): Index path, in the case of list values +# +# Returns: The Provenance of the dict, member or list element +# +def node_get_provenance(node, key=None, indices=None): + + provenance = node.get(PROVENANCE_KEY) + if provenance and key: + provenance = provenance.members.get(key) + if provenance and indices is not None: + for index in indices: + provenance = provenance.elements[index] + + return provenance + + +# A sentinel to be used as a default argument for functions that need +# to distinguish between a kwarg set to None and an unset kwarg. +_sentinel = object() + + +# node_get() +# +# Fetches a value from a dictionary node and checks it for +# an expected value. Use default_value when parsing a value +# which is only optionally supplied. +# +# Args: +# node (dict): The dictionary node +# expected_type (type): The expected type for the value being searched +# key (str): The key to get a value for in node +# indices (list of ints): Optionally decend into lists of lists +# default_value: Optionally return this value if the key is not found +# allow_none: (bool): Allow None to be a valid value +# +# Returns: +# The value if found in node, otherwise default_value is returned +# +# Raises: +# LoadError, when the value found is not of the expected type +# +# Note: +# Returned strings are stripped of leading and trailing whitespace +# +def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, allow_none=False): + value = node.get(key, default_value) + if value is _sentinel: + provenance = node_get_provenance(node) + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}: Dictionary did not contain expected key '{}'".format(provenance, key)) + + path = key + if indices is not None: + # Implied type check of the element itself + value = node_get(node, list, key) + for index in indices: + value = value[index] + path += '[{:d}]'.format(index) + + # Optionally allow None as a valid value for any type + if value is None and (allow_none or default_value is None): + return None + + if not isinstance(value, expected_type): + # Attempt basic conversions if possible, typically we want to + # be able to specify numeric values and convert them to strings, + # but we dont want to try converting dicts/lists + try: + if (expected_type == bool and isinstance(value, str)): + # Dont coerce booleans to string, this makes "False" strings evaluate to True + if value in ('True', 'true'): + value = True + elif value in ('False', 'false'): + value = False + else: + raise ValueError() + elif not (expected_type == list or + expected_type == dict or + isinstance(value, (list, dict))): + value = expected_type(value) + else: + raise ValueError() + except (ValueError, TypeError): + provenance = node_get_provenance(node, key=key, indices=indices) + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}: Value of '{}' is not of the expected type '{}'" + .format(provenance, path, expected_type.__name__)) + + # Trim it at the bud, let all loaded strings from yaml be stripped of whitespace + if isinstance(value, str): + value = value.strip() + + return value + + +# node_items() +# +# A convenience generator for iterating over loaded key/value +# tuples in a dictionary loaded from project YAML. +# +# Args: +# node (dict): The dictionary node +# +# Yields: +# (str): The key name +# (anything): The value for the key +# +def node_items(node): + for key, value in node.items(): + if key == PROVENANCE_KEY: + continue + yield (key, value) + + +# Gives a node a dummy provenance, in case of compositing dictionaries +# where the target is an empty {} +def ensure_provenance(node): + provenance = node.get(PROVENANCE_KEY) + if not provenance: + provenance = DictProvenance(ProvenanceFile('', '', None), node, node) + node[PROVENANCE_KEY] = provenance + + return provenance + + +# is_ruamel_str(): +# +# Args: +# value: A value loaded from ruamel +# +# This returns if the value is "stringish", since ruamel +# has some complex types to represent strings, this is needed +# to avoid compositing exceptions in order to allow various +# string types to be interchangable and acceptable +# +def is_ruamel_str(value): + + if isinstance(value, str): + return True + elif isinstance(value, yaml.scalarstring.ScalarString): + return True + + return False + + +# is_composite_list +# +# Checks if the given node is a Mapping with array composition +# directives. +# +# Args: +# node (value): Any node +# +# Returns: +# (bool): True if node was a Mapping containing only +# list composition directives +# +# Raises: +# (LoadError): If node was a mapping and contained a mix of +# list composition directives and other keys +# +def is_composite_list(node): + + if isinstance(node, collections.abc.Mapping): + has_directives = False + has_keys = False + + for key, _ in node_items(node): + if key in ['(>)', '(<)', '(=)']: # pylint: disable=simplifiable-if-statement + has_directives = True + else: + has_keys = True + + if has_keys and has_directives: + provenance = node_get_provenance(node) + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}: Dictionary contains array composition directives and arbitrary keys" + .format(provenance)) + return has_directives + + return False + + +# composite_list_prepend +# +# Internal helper for list composition +# +# Args: +# target_node (dict): A simple dictionary +# target_key (dict): The key indicating a literal array to prepend to +# source_node (dict): Another simple dictionary +# source_key (str): The key indicating an array to prepend to the target +# +# Returns: +# (bool): True if a source list was found and compositing occurred +# +def composite_list_prepend(target_node, target_key, source_node, source_key): + + source_list = node_get(source_node, list, source_key, default_value=[]) + if not source_list: + return False + + target_provenance = node_get_provenance(target_node) + source_provenance = node_get_provenance(source_node) + + if target_node.get(target_key) is None: + target_node[target_key] = [] + + source_list = list_copy(source_list) + target_list = target_node[target_key] + + for element in reversed(source_list): + target_list.insert(0, element) + + if not target_provenance.members.get(target_key): + target_provenance.members[target_key] = source_provenance.members[source_key].clone() + else: + for p in reversed(source_provenance.members[source_key].elements): + target_provenance.members[target_key].elements.insert(0, p.clone()) + + return True + + +# composite_list_append +# +# Internal helper for list composition +# +# Args: +# target_node (dict): A simple dictionary +# target_key (dict): The key indicating a literal array to append to +# source_node (dict): Another simple dictionary +# source_key (str): The key indicating an array to append to the target +# +# Returns: +# (bool): True if a source list was found and compositing occurred +# +def composite_list_append(target_node, target_key, source_node, source_key): + + source_list = node_get(source_node, list, source_key, default_value=[]) + if not source_list: + return False + + target_provenance = node_get_provenance(target_node) + source_provenance = node_get_provenance(source_node) + + if target_node.get(target_key) is None: + target_node[target_key] = [] + + source_list = list_copy(source_list) + target_list = target_node[target_key] + + target_list.extend(source_list) + + if not target_provenance.members.get(target_key): + target_provenance.members[target_key] = source_provenance.members[source_key].clone() + else: + target_provenance.members[target_key].elements.extend([ + p.clone() for p in source_provenance.members[source_key].elements + ]) + + return True + + +# composite_list_overwrite +# +# Internal helper for list composition +# +# Args: +# target_node (dict): A simple dictionary +# target_key (dict): The key indicating a literal array to overwrite +# source_node (dict): Another simple dictionary +# source_key (str): The key indicating an array to overwrite the target with +# +# Returns: +# (bool): True if a source list was found and compositing occurred +# +def composite_list_overwrite(target_node, target_key, source_node, source_key): + + # We need to handle the legitimate case of overwriting a list with an empty + # list, hence the slightly odd default_value of [None] rather than []. + source_list = node_get(source_node, list, source_key, default_value=[None]) + if source_list == [None]: + return False + + target_provenance = node_get_provenance(target_node) + source_provenance = node_get_provenance(source_node) + + target_node[target_key] = list_copy(source_list) + target_provenance.members[target_key] = source_provenance.members[source_key].clone() + + return True + + +# composite_list(): +# +# Composite the source value onto the target value, if either +# sides are lists, or dictionaries containing list compositing directives +# +# Args: +# target_node (dict): A simple dictionary +# source_node (dict): Another simple dictionary +# key (str): The key to compose on +# +# Returns: +# (bool): True if both sides were logical lists +# +# Raises: +# (LoadError): If one side was a logical list and the other was not +# +def composite_list(target_node, source_node, key): + target_value = target_node.get(key) + source_value = source_node[key] + + target_key_provenance = node_get_provenance(target_node, key) + source_key_provenance = node_get_provenance(source_node, key) + + # Whenever a literal list is encountered in the source, it + # overwrites the target values and provenance completely. + # + if isinstance(source_value, list): + + source_provenance = node_get_provenance(source_node) + target_provenance = node_get_provenance(target_node) + + # Assert target type + if not (target_value is None or + isinstance(target_value, list) or + is_composite_list(target_value)): + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}: List cannot overwrite value at: {}" + .format(source_key_provenance, target_key_provenance)) + + composite_list_overwrite(target_node, key, source_node, key) + return True + + # When a composite list is encountered in the source, then + # multiple outcomes can occur... + # + elif is_composite_list(source_value): + + # If there is nothing there, then the composite list + # is copied in it's entirety as is, and preserved + # for later composition + # + if target_value is None: + source_provenance = node_get_provenance(source_node) + target_provenance = node_get_provenance(target_node) + + target_node[key] = node_copy(source_value) + target_provenance.members[key] = source_provenance.members[key].clone() + + # If the target is a literal list, then composition + # occurs directly onto that target, leaving the target + # as a literal list to overwrite anything in later composition + # + elif isinstance(target_value, list): + composite_list_overwrite(target_node, key, source_value, '(=)') + composite_list_prepend(target_node, key, source_value, '(<)') + composite_list_append(target_node, key, source_value, '(>)') + + # If the target is a composite list, then composition + # occurs in the target composite list, and the composite + # target list is preserved in dictionary form for further + # composition. + # + elif is_composite_list(target_value): + + if composite_list_overwrite(target_value, '(=)', source_value, '(=)'): + + # When overwriting a target with composition directives, remove any + # existing prepend/append directives in the target before adding our own + target_provenance = node_get_provenance(target_value) + + for directive in ['(<)', '(>)']: + try: + del target_value[directive] + del target_provenance.members[directive] + except KeyError: + # Ignore errors from deletion of non-existing keys + pass + + # Prepend to the target prepend array, and append to the append array + composite_list_prepend(target_value, '(<)', source_value, '(<)') + composite_list_append(target_value, '(>)', source_value, '(>)') + + else: + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}: List cannot overwrite value at: {}" + .format(source_key_provenance, target_key_provenance)) + + # We handled list composition in some way + return True + + # Source value was not a logical list + return False + + +# composite_dict(): +# +# Composites values in target with values from source +# +# Args: +# target (dict): A simple dictionary +# source (dict): Another simple dictionary +# +# Raises: CompositeError +# +# Unlike the dictionary update() method, nested values in source +# will not obsolete entire subdictionaries in target, instead both +# dictionaries will be recursed and a composition of both will result +# +# This is useful for overriding configuration files and element +# configurations. +# +def composite_dict(target, source, path=None): + target_provenance = ensure_provenance(target) + source_provenance = ensure_provenance(source) + + for key, source_value in node_items(source): + + # Track the full path of keys, only for raising CompositeError + if path: + thispath = path + '.' + key + else: + thispath = key + + # Handle list composition separately + if composite_list(target, source, key): + continue + + target_value = target.get(key) + + if isinstance(source_value, collections.abc.Mapping): + + # Handle creating new dicts on target side + if target_value is None: + target_value = {} + target[key] = target_value + + # Give the new dict provenance + value_provenance = source_value.get(PROVENANCE_KEY) + if value_provenance: + target_value[PROVENANCE_KEY] = value_provenance.clone() + + # Add a new provenance member element to the containing dict + target_provenance.members[key] = source_provenance.members[key] + + if not isinstance(target_value, collections.abc.Mapping): + raise CompositeTypeError(thispath, type(target_value), type(source_value)) + + # Recurse into matching dictionary + composite_dict(target_value, source_value, path=thispath) + + else: + + if target_value is not None: + + # Exception here: depending on how strings were declared ruamel may + # use a different type, but for our purposes, any stringish type will do. + if not (is_ruamel_str(source_value) and is_ruamel_str(target_value)) \ + and not isinstance(source_value, type(target_value)): + raise CompositeTypeError(thispath, type(target_value), type(source_value)) + + # Overwrite simple values, lists and mappings have already been handled + target_provenance.members[key] = source_provenance.members[key].clone() + target[key] = source_value + + +# Like composite_dict(), but raises an all purpose LoadError for convenience +# +def composite(target, source): + assert hasattr(source, 'get') + + source_provenance = node_get_provenance(source) + try: + composite_dict(target, source) + except CompositeTypeError as e: + error_prefix = "" + if source_provenance: + error_prefix = "{}: ".format(source_provenance) + raise LoadError(LoadErrorReason.ILLEGAL_COMPOSITE, + "{}Expected '{}' type for configuration '{}', instead received '{}'" + .format(error_prefix, + e.expected_type.__name__, + e.path, + e.actual_type.__name__)) from e + + +# Like composite(target, source), but where target overrides source instead. +# +def composite_and_move(target, source): + composite(source, target) + + to_delete = [key for key, _ in node_items(target) if key not in source] + for key, value in source.items(): + target[key] = value + for key in to_delete: + del target[key] + + +# SanitizedDict is an OrderedDict that is dumped as unordered mapping. +# This provides deterministic output for unordered mappings. +# +class SanitizedDict(collections.OrderedDict): + pass + + +RoundTripRepresenter.add_representer(SanitizedDict, + SafeRepresenter.represent_dict) + + +# Types we can short-circuit in node_sanitize for speed. +__SANITIZE_SHORT_CIRCUIT_TYPES = (int, float, str, bool, tuple) + + +# node_sanitize() +# +# Returnes an alphabetically ordered recursive copy +# of the source node with internal provenance information stripped. +# +# Only dicts are ordered, list elements are left in order. +# +def node_sanitize(node): + # Short-circuit None which occurs ca. twice per element + if node is None: + return node + + node_type = type(node) + # Next short-circuit integers, floats, strings, booleans, and tuples + if node_type in __SANITIZE_SHORT_CIRCUIT_TYPES: + return node + # Now short-circuit lists. Note this is only for the raw list + # type, CommentedSeq and others get caught later. + elif node_type is list: + return [node_sanitize(elt) for elt in node] + + # Finally dict, and other Mappings need special handling + if node_type is dict or isinstance(node, collections.abc.Mapping): + result = SanitizedDict() + + key_list = [key for key, _ in node_items(node)] + for key in sorted(key_list): + result[key] = node_sanitize(node[key]) + + return result + # Catch the case of CommentedSeq and friends. This is more rare and so + # we keep complexity down by still using isinstance here. + elif isinstance(node, list): + return [node_sanitize(elt) for elt in node] + + # Everything else (such as commented scalars) just gets returned as-is. + return node + + +# node_validate() +# +# Validate the node so as to ensure the user has not specified +# any keys which are unrecognized by buildstream (usually this +# means a typo which would otherwise not trigger an error). +# +# Args: +# node (dict): A dictionary loaded from YAML +# valid_keys (list): A list of valid keys for the specified node +# +# Raises: +# LoadError: In the case that the specified node contained +# one or more invalid keys +# +def node_validate(node, valid_keys): + + # Probably the fastest way to do this: https://stackoverflow.com/a/23062482 + valid_keys = set(valid_keys) + valid_keys.add(PROVENANCE_KEY) + invalid = next((key for key in node if key not in valid_keys), None) + + if invalid: + provenance = node_get_provenance(node, key=invalid) + raise LoadError(LoadErrorReason.INVALID_DATA, + "{}: Unexpected key: {}".format(provenance, invalid)) + + +# Node copying +# +# Unfortunately we copy nodes a *lot* and `isinstance()` is super-slow when +# things from collections.abc get involved. The result is the following +# intricate but substantially faster group of tuples and the use of `in`. +# +# If any of the {node,list}_copy routines raise a ValueError +# then it's likely additional types need adding to these tuples. + + +# These types just have their value copied +__QUICK_TYPES = (str, bool, + yaml.scalarstring.PreservedScalarString, + yaml.scalarstring.SingleQuotedScalarString, + yaml.scalarstring.DoubleQuotedScalarString) + +# These types have to be iterated like a dictionary +__DICT_TYPES = (dict, yaml.comments.CommentedMap) + +# These types have to be iterated like a list +__LIST_TYPES = (list, yaml.comments.CommentedSeq) + +# These are the provenance types, which have to be cloned rather than any other +# copying tactic. +__PROVENANCE_TYPES = (Provenance, DictProvenance, MemberProvenance, ElementProvenance) + +# These are the directives used to compose lists, we need this because it's +# slightly faster during the node_final_assertions checks +__NODE_ASSERT_COMPOSITION_DIRECTIVES = ('(>)', '(<)', '(=)') + + +def node_copy(source): + copy = {} + for key, value in source.items(): + value_type = type(value) + if value_type in __DICT_TYPES: + copy[key] = node_copy(value) + elif value_type in __LIST_TYPES: + copy[key] = list_copy(value) + elif value_type in __PROVENANCE_TYPES: + copy[key] = value.clone() + elif value_type in __QUICK_TYPES: + copy[key] = value + else: + raise ValueError("Unable to be quick about node_copy of {}".format(value_type)) + + ensure_provenance(copy) + + return copy + + +def list_copy(source): + copy = [] + for item in source: + item_type = type(item) + if item_type in __DICT_TYPES: + copy.append(node_copy(item)) + elif item_type in __LIST_TYPES: + copy.append(list_copy(item)) + elif item_type in __PROVENANCE_TYPES: + copy.append(item.clone()) + elif item_type in __QUICK_TYPES: + copy.append(item) + else: + raise ValueError("Unable to be quick about list_copy of {}".format(item_type)) + + return copy + + +# node_final_assertions() +# +# This must be called on a fully loaded and composited node, +# after all composition has completed. +# +# Args: +# node (Mapping): The final composited node +# +# Raises: +# (LoadError): If any assertions fail +# +def node_final_assertions(node): + for key, value in node_items(node): + + # Assert that list composition directives dont remain, this + # indicates that the user intended to override a list which + # never existed in the underlying data + # + if key in __NODE_ASSERT_COMPOSITION_DIRECTIVES: + provenance = node_get_provenance(node, key) + raise LoadError(LoadErrorReason.TRAILING_LIST_DIRECTIVE, + "{}: Attempt to override non-existing list".format(provenance)) + + value_type = type(value) + + if value_type in __DICT_TYPES: + node_final_assertions(value) + elif value_type in __LIST_TYPES: + list_final_assertions(value) + + +def list_final_assertions(values): + for value in values: + value_type = type(value) + + if value_type in __DICT_TYPES: + node_final_assertions(value) + elif value_type in __LIST_TYPES: + list_final_assertions(value) + + +# assert_symbol_name() +# +# A helper function to check if a loaded string is a valid symbol +# name and to raise a consistent LoadError if not. For strings which +# are required to be symbols. +# +# Args: +# provenance (Provenance): The provenance of the loaded symbol, or None +# symbol_name (str): The loaded symbol name +# purpose (str): The purpose of the string, for an error message +# allow_dashes (bool): Whether dashes are allowed for this symbol +# +# Raises: +# LoadError: If the symbol_name is invalid +# +# Note that dashes are generally preferred for variable names and +# usage in YAML, but things such as option names which will be +# evaluated with jinja2 cannot use dashes. +# +def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True): + valid_chars = string.digits + string.ascii_letters + '_' + if allow_dashes: + valid_chars += '-' + + valid = True + if not symbol_name: + valid = False + elif any(x not in valid_chars for x in symbol_name): + valid = False + elif symbol_name[0] in string.digits: + valid = False + + if not valid: + detail = "Symbol names must contain only alphanumeric characters, " + \ + "may not start with a digit, and may contain underscores" + if allow_dashes: + detail += " or dashes" + + message = "Invalid symbol name for {}: '{}'".format(purpose, symbol_name) + if provenance is not None: + message = "{}: {}".format(provenance, message) + + raise LoadError(LoadErrorReason.INVALID_SYMBOL_NAME, + message, detail=detail) diff --git a/buildstream/element.py b/buildstream/element.py index 07745dbf0..0d165370c 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -86,6 +86,7 @@ import shutil import string from . import _yaml +from . import _yaml_roundtrip from ._variables import Variables from ._versions import BST_CORE_ARTIFACT_VERSION from ._exceptions import BstError, LoadError, LoadErrorReason, ImplError, \ @@ -1750,35 +1751,35 @@ class Element(Plugin): shutil.copyfile(log_filename, self._build_log_path) # Store public data - _yaml.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml')) + _yaml_roundtrip.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml')) # Store result build_result_dict = {"success": self.__build_result[0], "description": self.__build_result[1]} if self.__build_result[2] is not None: build_result_dict["detail"] = self.__build_result[2] - _yaml.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml')) + _yaml_roundtrip.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml')) # ensure we have cache keys self._assemble_done() # Store keys.yaml - _yaml.dump(_yaml.node_sanitize({ + _yaml_roundtrip.dump(_yaml.node_sanitize({ 'strong': self._get_cache_key(), 'weak': self._get_cache_key(_KeyStrength.WEAK), }), os.path.join(metadir, 'keys.yaml')) # Store dependencies.yaml - _yaml.dump(_yaml.node_sanitize({ + _yaml_roundtrip.dump(_yaml.node_sanitize({ e.name: e._get_cache_key() for e in self.dependencies(Scope.BUILD) }), os.path.join(metadir, 'dependencies.yaml')) # Store workspaced.yaml - _yaml.dump(_yaml.node_sanitize({ + _yaml_roundtrip.dump(_yaml.node_sanitize({ 'workspaced': bool(self._get_workspace()) }), os.path.join(metadir, 'workspaced.yaml')) # Store workspaced-dependencies.yaml - _yaml.dump(_yaml.node_sanitize({ + _yaml_roundtrip.dump(_yaml.node_sanitize({ 'workspaced-dependencies': [ e.name for e in self.dependencies(Scope.BUILD) if e._get_workspace() diff --git a/buildstream/plugintestutils/runcli.py b/buildstream/plugintestutils/runcli.py index 5dbecdd60..558f8f964 100644 --- a/buildstream/plugintestutils/runcli.py +++ b/buildstream/plugintestutils/runcli.py @@ -52,6 +52,8 @@ from _pytest.capture import MultiCapture, FDCapture, FDCaptureBinary # Import the main cli entrypoint from buildstream._frontend import cli as bst_cli from buildstream import _yaml +from buildstream import _yaml_roundtrip + # Special private exception accessor, for test case purposes from buildstream._exceptions import BstError, get_last_exception, get_last_task_error @@ -542,7 +544,7 @@ class CliIntegration(Cli): _yaml.composite_dict(base_config, project_config) base_config = _yaml.node_sanitize(base_config) - _yaml.dump(base_config, project_filename) + _yaml_roundtrip.dump(base_config, project_filename) else: @@ -645,6 +647,6 @@ def configured(directory, config=None): # Dump it and yield the filename for test scripts to feed it # to buildstream as an artument filename = os.path.join(directory, "buildstream.conf") - _yaml.dump(config, filename) + _yaml_roundtrip.dump(config, filename) yield filename diff --git a/buildstream/source.py b/buildstream/source.py index b0ba1c744..31156854f 100644 --- a/buildstream/source.py +++ b/buildstream/source.py @@ -166,7 +166,7 @@ from collections.abc import Mapping from contextlib import contextmanager from . import Plugin, Consistency -from . import _yaml, utils +from . import _yaml, utils, _yaml_roundtrip from ._exceptions import BstError, ImplError, ErrorDomain from ._loader.metasource import MetaSource from ._projectrefs import ProjectRefStorage @@ -889,7 +889,8 @@ class Source(Plugin): # Save the ref in the originating file # try: - _yaml.dump(provenance.toplevel, provenance.filename.name) + # This is our roundtrip dump from the track + _yaml_roundtrip.dump(provenance.toplevel, provenance.filename.name) except OSError as e: raise SourceError("{}: Error saving source reference to '{}': {}" .format(self, provenance.filename.name, e), |