summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Ennis <james.ennis@codethink.co.uk>2019-03-07 16:34:10 +0000
committerJames Ennis <james.ennis@codethink.co.uk>2019-03-07 16:34:10 +0000
commit8ee336e948eb170fd9714bd060fc4f1e4628eeea (patch)
tree2884e9a6724a9e071a993607eca33f1bbca19199
parent547d212c1d3bae5ef9d0e08b71091fe971a447cd (diff)
downloadbuildstream-8ee336e948eb170fd9714bd060fc4f1e4628eeea.tar.gz
Add _yaml_roundtrip module to be used when we track or roundtrip dump
Currently, this module is just a copy of _yaml.py. We should strip this of everything irrelevant
-rw-r--r--buildstream/_projectrefs.py9
-rw-r--r--buildstream/_stream.py5
-rw-r--r--buildstream/_workspaces.py17
-rw-r--r--buildstream/_yaml_roundtrip.py1055
-rw-r--r--buildstream/element.py13
-rw-r--r--buildstream/plugintestutils/runcli.py6
-rw-r--r--buildstream/source.py5
7 files changed, 1091 insertions, 19 deletions
diff --git a/buildstream/_projectrefs.py b/buildstream/_projectrefs.py
index 4009d7449..6edfa21bb 100644
--- a/buildstream/_projectrefs.py
+++ b/buildstream/_projectrefs.py
@@ -19,6 +19,7 @@
import os
from . import _yaml
+from . import _yaml_roundtrip
from ._exceptions import LoadError, LoadErrorReason
@@ -64,7 +65,7 @@ class ProjectRefs():
try:
self._toplevel_node = _yaml.load(self._fullpath, shortname=self._base_name, copy_tree=True)
- provenance = _yaml.node_get_provenance(self._toplevel_node)
+ provenance = _yaml_roundtrip.node_get_provenance(self._toplevel_node)
self._toplevel_save = provenance.toplevel
# Process any project options immediately
@@ -72,7 +73,7 @@ class ProjectRefs():
# Run any final assertions on the project.refs, just incase there
# are list composition directives or anything left unprocessed.
- _yaml.node_final_assertions(self._toplevel_node)
+ _yaml_roundtrip.node_final_assertions(self._toplevel_node)
except LoadError as e:
if e.reason != LoadErrorReason.MISSING_FILE:
@@ -83,7 +84,7 @@ class ProjectRefs():
self._toplevel_node = {}
self._toplevel_save = self._toplevel_node
- _yaml.node_validate(self._toplevel_node, ['projects'])
+ _yaml_roundtrip.node_validate(self._toplevel_node, ['projects'])
# Ensure we create our toplevel entry point on the fly here
for node in [self._toplevel_node, self._toplevel_save]:
@@ -95,7 +96,7 @@ class ProjectRefs():
# Save the project.refs file with any local changes
#
def save(self):
- _yaml.dump(self._toplevel_save, self._fullpath)
+ _yaml_roundtrip.dump(self._toplevel_save, self._fullpath)
# lookup_ref()
#
diff --git a/buildstream/_stream.py b/buildstream/_stream.py
index b0fce3817..72fe2cd8c 100644
--- a/buildstream/_stream.py
+++ b/buildstream/_stream.py
@@ -35,7 +35,7 @@ from ._message import Message, MessageType
from ._scheduler import Scheduler, SchedStatus, TrackQueue, FetchQueue, BuildQueue, PullQueue, PushQueue
from ._pipeline import Pipeline, PipelineSelection
from ._profile import Topics, profile_start, profile_end
-from . import utils, _yaml, _site
+from . import utils, _yaml, _site, _yaml_roundtrip
from . import Scope, Consistency
@@ -812,7 +812,8 @@ class Stream():
}
workspaces.append(workspace_detail)
- _yaml.dump({
+ # NOTE: Not sure about this one
+ _yaml_roundtrip.dump({
'workspaces': workspaces
})
diff --git a/buildstream/_workspaces.py b/buildstream/_workspaces.py
index 24a3cc8d3..81d2f863f 100644
--- a/buildstream/_workspaces.py
+++ b/buildstream/_workspaces.py
@@ -20,6 +20,7 @@
import os
from . import utils
from . import _yaml
+from . import _yaml_roundtrip
from ._exceptions import LoadError, LoadErrorReason
@@ -114,6 +115,7 @@ class WorkspaceProject():
def load(cls, directory):
workspace_file = os.path.join(directory, WORKSPACE_PROJECT_FILE)
if os.path.exists(workspace_file):
+ # NOTE: This probably doesn't need to be loaded with provenance either
data_dict = _yaml.load(workspace_file)
return cls.from_dict(directory, data_dict)
else:
@@ -125,7 +127,9 @@ class WorkspaceProject():
#
def write(self):
os.makedirs(self._directory, exist_ok=True)
- _yaml.dump(self.to_dict(), self.get_filename())
+ # This isn't even a roundtrip, it's just dumping a dict created in
+ # to_dict() to the file
+ _yaml_roundtrip.dump(self.to_dict(), self.get_filename())
# get_filename()
#
@@ -530,8 +534,11 @@ class Workspaces():
}
}
os.makedirs(self._bst_directory, exist_ok=True)
- _yaml.dump(_yaml.node_sanitize(config),
- self._get_filename())
+ # NOTE: We're just dumping to the workspaces.yml file here, potentially we could
+ # just use ruamel.yaml.dump() here? I don't think this file is required for cache,
+ # key calc
+ _yaml_roundtrip.dump(_yaml.node_sanitize(config),
+ self._get_filename())
# _load_config()
#
@@ -545,6 +552,8 @@ class Workspaces():
def _load_config(self):
workspace_file = self._get_filename()
try:
+ # NOTE: Again, we're not expecting the user to be tweaking this file, and it doesn't
+ # make a difference to cache keys, surely we just load without provenance?
node = _yaml.load(workspace_file)
except LoadError as e:
if e.reason == LoadErrorReason.MISSING_FILE:
@@ -570,6 +579,7 @@ class Workspaces():
# Raises: LoadError if there was a problem with the workspace config
#
def _parse_workspace_config(self, workspaces):
+ # NOTE: As we're providing this with a default value, it won't fail.
version = _yaml.node_get(workspaces, int, "format-version", default_value=0)
if version == 0:
@@ -624,6 +634,7 @@ class Workspaces():
def _load_workspace(self, node):
dictionary = {
'prepared': _yaml.node_get(node, bool, 'prepared', default_value=False),
+ ## NOTE: WE MIGHT NEED PROVENANCE HERE....
'path': _yaml.node_get(node, str, 'path'),
'last_successful': _yaml.node_get(node, str, 'last_successful', default_value=None),
'running_files': _yaml.node_get(node, dict, 'running_files', default_value=None),
diff --git a/buildstream/_yaml_roundtrip.py b/buildstream/_yaml_roundtrip.py
new file mode 100644
index 000000000..482e23bb3
--- /dev/null
+++ b/buildstream/_yaml_roundtrip.py
@@ -0,0 +1,1055 @@
+#
+# Copyright (C) 2018 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+# Authors:
+# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
+
+import sys
+import collections
+import string
+from copy import deepcopy
+from contextlib import ExitStack
+
+from ruamel import yaml
+from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
+from ruamel.yaml.constructor import RoundTripConstructor
+from ._exceptions import LoadError, LoadErrorReason
+
+# This overrides the ruamel constructor to treat everything as a string
+RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int', RoundTripConstructor.construct_yaml_str)
+RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:float', RoundTripConstructor.construct_yaml_str)
+
+# We store information in the loaded yaml on a DictProvenance
+# stored in all dictionaries under this key
+PROVENANCE_KEY = '__bst_provenance_info'
+
+
+# Provides information about file for provenance
+#
+# Args:
+# name (str): Full path to the file
+# shortname (str): Relative path to the file
+# project (Project): Project where the shortname is relative from
+class ProvenanceFile():
+ def __init__(self, name, shortname, project):
+ self.name = name
+ self.shortname = shortname
+ self.project = project
+
+
+# Provenance tracks the origin of a given node in the parsed dictionary.
+#
+# Args:
+# node (dict, list, value): A binding to the originally parsed value
+# filename (string): The filename the node was loaded from
+# toplevel (dict): The toplevel of the loaded file, suitable for later dumps
+# line (int): The line number where node was parsed
+# col (int): The column number where node was parsed
+#
+class Provenance():
+ def __init__(self, filename, node, toplevel, line=0, col=0):
+ self.filename = filename
+ self.node = node
+ self.toplevel = toplevel
+ self.line = line
+ self.col = col
+
+ # Convert a Provenance to a string for error reporting
+ def __str__(self):
+ return "{} [line {:d} column {:d}]".format(self.filename.shortname, self.line, self.col)
+
+ # Abstract method
+ def clone(self):
+ pass # pragma: nocover
+
+
+# A Provenance for dictionaries, these are stored in the copy of the
+# loaded YAML tree and track the provenance of all members
+#
+class DictProvenance(Provenance):
+ def __init__(self, filename, node, toplevel, line=None, col=None):
+
+ if line is None or col is None:
+ # Special case for loading an empty dict
+ if hasattr(node, 'lc'):
+ line = node.lc.line + 1
+ col = node.lc.col
+ else:
+ line = 1
+ col = 0
+
+ super(DictProvenance, self).__init__(filename, node, toplevel, line=line, col=col)
+
+ self.members = {}
+
+ def clone(self):
+ provenance = DictProvenance(self.filename, self.node, self.toplevel,
+ line=self.line, col=self.col)
+
+ provenance.members = {
+ member_name: member.clone()
+ for member_name, member in self.members.items()
+ }
+ return provenance
+
+
+# A Provenance for dict members
+#
+class MemberProvenance(Provenance):
+ def __init__(self, filename, parent_dict, member_name, toplevel,
+ node=None, line=None, col=None):
+
+ if parent_dict is not None:
+ node = parent_dict[member_name]
+ line, col = parent_dict.lc.value(member_name)
+ line += 1
+
+ super(MemberProvenance, self).__init__(
+ filename, node, toplevel, line=line, col=col)
+
+ # Only used if member is a list
+ self.elements = []
+
+ def clone(self):
+ provenance = MemberProvenance(self.filename, None, None, self.toplevel,
+ node=self.node, line=self.line, col=self.col)
+ provenance.elements = [e.clone() for e in self.elements]
+ return provenance
+
+
+# A Provenance for list elements
+#
+class ElementProvenance(Provenance):
+ def __init__(self, filename, parent_list, index, toplevel,
+ node=None, line=None, col=None):
+
+ if parent_list is not None:
+ node = parent_list[index]
+ line, col = parent_list.lc.item(index)
+ line += 1
+
+ super(ElementProvenance, self).__init__(
+ filename, node, toplevel, line=line, col=col)
+
+ # Only used if element is a list
+ self.elements = []
+
+ def clone(self):
+ provenance = ElementProvenance(self.filename, None, None, self.toplevel,
+ node=self.node, line=self.line, col=self.col)
+
+ provenance.elements = [e.clone for e in self.elements]
+ return provenance
+
+
+# These exceptions are intended to be caught entirely within
+# the BuildStream framework, hence they do not reside in the
+# public exceptions.py
+class CompositeError(Exception):
+ def __init__(self, path, message):
+ super(CompositeError, self).__init__(message)
+ self.path = path
+
+
+class CompositeTypeError(CompositeError):
+ def __init__(self, path, expected_type, actual_type):
+ super(CompositeTypeError, self).__init__(
+ path,
+ "Error compositing dictionary key '{}', expected source type '{}' "
+ "but received type '{}'"
+ .format(path, expected_type.__name__, actual_type.__name__))
+ self.expected_type = expected_type
+ self.actual_type = actual_type
+
+
+# Loads a dictionary from some YAML
+#
+# Args:
+# filename (str): The YAML file to load
+# shortname (str): The filename in shorthand for error reporting (or None)
+# copy_tree (bool): Whether to make a copy, preserving the original toplevels
+# for later serialization
+# yaml_cache (YamlCache): A yaml cache to consult rather than parsing
+#
+# Returns (dict): A loaded copy of the YAML file with provenance information
+#
+# Raises: LoadError
+#
+def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=None):
+ if not shortname:
+ shortname = filename
+
+ file = ProvenanceFile(filename, shortname, project)
+
+ try:
+ data = None
+ with open(filename) as f:
+ contents = f.read()
+ if yaml_cache:
+ data, key = yaml_cache.get(project, filename, contents, copy_tree)
+
+ if not data:
+ data = load_data(contents, file, copy_tree=copy_tree)
+ if yaml_cache:
+ yaml_cache.put_from_key(project, filename, key, data)
+
+ return data
+ except FileNotFoundError as e:
+ raise LoadError(LoadErrorReason.MISSING_FILE,
+ "Could not find file at {}".format(filename)) from e
+ except IsADirectoryError as e:
+ raise LoadError(LoadErrorReason.LOADING_DIRECTORY,
+ "{} is a directory. bst command expects a .bst file."
+ .format(filename)) from e
+
+
+# Like load(), but doesnt require the data to be in a file
+#
+def load_data(data, file=None, copy_tree=False):
+
+ try:
+ contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
+ except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
+ raise LoadError(LoadErrorReason.INVALID_YAML,
+ "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
+
+ if not isinstance(contents, dict):
+ # Special case allowance for None, when the loaded file has only comments in it.
+ if contents is None:
+ contents = {}
+ else:
+ raise LoadError(LoadErrorReason.INVALID_YAML,
+ "YAML file has content of type '{}' instead of expected type 'dict': {}"
+ .format(type(contents).__name__, file.name))
+
+ return node_decorated_copy(file, contents, copy_tree=copy_tree)
+
+
+# Dumps a previously loaded YAML node to a file
+#
+# Args:
+# node (dict): A node previously loaded with _yaml.load() above
+# filename (str): The YAML file to load
+#
+def dump(node, filename=None):
+ with ExitStack() as stack:
+ if filename:
+ from . import utils
+ f = stack.enter_context(utils.save_file_atomic(filename, 'w'))
+ else:
+ f = sys.stdout
+ yaml.round_trip_dump(node, f)
+
+
+# node_decorated_copy()
+#
+# Create a copy of a loaded dict tree decorated with Provenance
+# information, used directly after loading yaml
+#
+# Args:
+# filename (str): The filename
+# toplevel (node): The toplevel dictionary node
+# copy_tree (bool): Whether to load a copy and preserve the original
+#
+# Returns: A copy of the toplevel decorated with Provinance
+#
+def node_decorated_copy(filename, toplevel, copy_tree=False):
+ if copy_tree:
+ result = deepcopy(toplevel)
+ else:
+ result = toplevel
+
+ node_decorate_dict(filename, result, toplevel, toplevel)
+
+ return result
+
+
+def node_decorate_dict(filename, target, source, toplevel):
+ provenance = DictProvenance(filename, source, toplevel)
+ target[PROVENANCE_KEY] = provenance
+
+ for key, value in node_items(source):
+ member = MemberProvenance(filename, source, key, toplevel)
+ provenance.members[key] = member
+
+ target_value = target.get(key)
+ if isinstance(value, collections.abc.Mapping):
+ node_decorate_dict(filename, target_value, value, toplevel)
+ elif isinstance(value, list):
+ member.elements = node_decorate_list(filename, target_value, value, toplevel)
+
+
+def node_decorate_list(filename, target, source, toplevel):
+
+ elements = []
+
+ for item in source:
+ idx = source.index(item)
+ target_item = target[idx]
+ element = ElementProvenance(filename, source, idx, toplevel)
+
+ if isinstance(item, collections.abc.Mapping):
+ node_decorate_dict(filename, target_item, item, toplevel)
+ elif isinstance(item, list):
+ element.elements = node_decorate_list(filename, target_item, item, toplevel)
+
+ elements.append(element)
+
+ return elements
+
+
+# node_get_provenance()
+#
+# Gets the provenance for a node
+#
+# Args:
+# node (dict): a dictionary
+# key (str): key in the dictionary
+# indices (list of indexes): Index path, in the case of list values
+#
+# Returns: The Provenance of the dict, member or list element
+#
+def node_get_provenance(node, key=None, indices=None):
+
+ provenance = node.get(PROVENANCE_KEY)
+ if provenance and key:
+ provenance = provenance.members.get(key)
+ if provenance and indices is not None:
+ for index in indices:
+ provenance = provenance.elements[index]
+
+ return provenance
+
+
+# A sentinel to be used as a default argument for functions that need
+# to distinguish between a kwarg set to None and an unset kwarg.
+_sentinel = object()
+
+
+# node_get()
+#
+# Fetches a value from a dictionary node and checks it for
+# an expected value. Use default_value when parsing a value
+# which is only optionally supplied.
+#
+# Args:
+# node (dict): The dictionary node
+# expected_type (type): The expected type for the value being searched
+# key (str): The key to get a value for in node
+# indices (list of ints): Optionally decend into lists of lists
+# default_value: Optionally return this value if the key is not found
+# allow_none: (bool): Allow None to be a valid value
+#
+# Returns:
+# The value if found in node, otherwise default_value is returned
+#
+# Raises:
+# LoadError, when the value found is not of the expected type
+#
+# Note:
+# Returned strings are stripped of leading and trailing whitespace
+#
+def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, allow_none=False):
+ value = node.get(key, default_value)
+ if value is _sentinel:
+ provenance = node_get_provenance(node)
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}: Dictionary did not contain expected key '{}'".format(provenance, key))
+
+ path = key
+ if indices is not None:
+ # Implied type check of the element itself
+ value = node_get(node, list, key)
+ for index in indices:
+ value = value[index]
+ path += '[{:d}]'.format(index)
+
+ # Optionally allow None as a valid value for any type
+ if value is None and (allow_none or default_value is None):
+ return None
+
+ if not isinstance(value, expected_type):
+ # Attempt basic conversions if possible, typically we want to
+ # be able to specify numeric values and convert them to strings,
+ # but we dont want to try converting dicts/lists
+ try:
+ if (expected_type == bool and isinstance(value, str)):
+ # Dont coerce booleans to string, this makes "False" strings evaluate to True
+ if value in ('True', 'true'):
+ value = True
+ elif value in ('False', 'false'):
+ value = False
+ else:
+ raise ValueError()
+ elif not (expected_type == list or
+ expected_type == dict or
+ isinstance(value, (list, dict))):
+ value = expected_type(value)
+ else:
+ raise ValueError()
+ except (ValueError, TypeError):
+ provenance = node_get_provenance(node, key=key, indices=indices)
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}: Value of '{}' is not of the expected type '{}'"
+ .format(provenance, path, expected_type.__name__))
+
+ # Trim it at the bud, let all loaded strings from yaml be stripped of whitespace
+ if isinstance(value, str):
+ value = value.strip()
+
+ return value
+
+
+# node_items()
+#
+# A convenience generator for iterating over loaded key/value
+# tuples in a dictionary loaded from project YAML.
+#
+# Args:
+# node (dict): The dictionary node
+#
+# Yields:
+# (str): The key name
+# (anything): The value for the key
+#
+def node_items(node):
+ for key, value in node.items():
+ if key == PROVENANCE_KEY:
+ continue
+ yield (key, value)
+
+
+# Gives a node a dummy provenance, in case of compositing dictionaries
+# where the target is an empty {}
+def ensure_provenance(node):
+ provenance = node.get(PROVENANCE_KEY)
+ if not provenance:
+ provenance = DictProvenance(ProvenanceFile('', '', None), node, node)
+ node[PROVENANCE_KEY] = provenance
+
+ return provenance
+
+
+# is_ruamel_str():
+#
+# Args:
+# value: A value loaded from ruamel
+#
+# This returns if the value is "stringish", since ruamel
+# has some complex types to represent strings, this is needed
+# to avoid compositing exceptions in order to allow various
+# string types to be interchangable and acceptable
+#
+def is_ruamel_str(value):
+
+ if isinstance(value, str):
+ return True
+ elif isinstance(value, yaml.scalarstring.ScalarString):
+ return True
+
+ return False
+
+
+# is_composite_list
+#
+# Checks if the given node is a Mapping with array composition
+# directives.
+#
+# Args:
+# node (value): Any node
+#
+# Returns:
+# (bool): True if node was a Mapping containing only
+# list composition directives
+#
+# Raises:
+# (LoadError): If node was a mapping and contained a mix of
+# list composition directives and other keys
+#
+def is_composite_list(node):
+
+ if isinstance(node, collections.abc.Mapping):
+ has_directives = False
+ has_keys = False
+
+ for key, _ in node_items(node):
+ if key in ['(>)', '(<)', '(=)']: # pylint: disable=simplifiable-if-statement
+ has_directives = True
+ else:
+ has_keys = True
+
+ if has_keys and has_directives:
+ provenance = node_get_provenance(node)
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}: Dictionary contains array composition directives and arbitrary keys"
+ .format(provenance))
+ return has_directives
+
+ return False
+
+
+# composite_list_prepend
+#
+# Internal helper for list composition
+#
+# Args:
+# target_node (dict): A simple dictionary
+# target_key (dict): The key indicating a literal array to prepend to
+# source_node (dict): Another simple dictionary
+# source_key (str): The key indicating an array to prepend to the target
+#
+# Returns:
+# (bool): True if a source list was found and compositing occurred
+#
+def composite_list_prepend(target_node, target_key, source_node, source_key):
+
+ source_list = node_get(source_node, list, source_key, default_value=[])
+ if not source_list:
+ return False
+
+ target_provenance = node_get_provenance(target_node)
+ source_provenance = node_get_provenance(source_node)
+
+ if target_node.get(target_key) is None:
+ target_node[target_key] = []
+
+ source_list = list_copy(source_list)
+ target_list = target_node[target_key]
+
+ for element in reversed(source_list):
+ target_list.insert(0, element)
+
+ if not target_provenance.members.get(target_key):
+ target_provenance.members[target_key] = source_provenance.members[source_key].clone()
+ else:
+ for p in reversed(source_provenance.members[source_key].elements):
+ target_provenance.members[target_key].elements.insert(0, p.clone())
+
+ return True
+
+
+# composite_list_append
+#
+# Internal helper for list composition
+#
+# Args:
+# target_node (dict): A simple dictionary
+# target_key (dict): The key indicating a literal array to append to
+# source_node (dict): Another simple dictionary
+# source_key (str): The key indicating an array to append to the target
+#
+# Returns:
+# (bool): True if a source list was found and compositing occurred
+#
+def composite_list_append(target_node, target_key, source_node, source_key):
+
+ source_list = node_get(source_node, list, source_key, default_value=[])
+ if not source_list:
+ return False
+
+ target_provenance = node_get_provenance(target_node)
+ source_provenance = node_get_provenance(source_node)
+
+ if target_node.get(target_key) is None:
+ target_node[target_key] = []
+
+ source_list = list_copy(source_list)
+ target_list = target_node[target_key]
+
+ target_list.extend(source_list)
+
+ if not target_provenance.members.get(target_key):
+ target_provenance.members[target_key] = source_provenance.members[source_key].clone()
+ else:
+ target_provenance.members[target_key].elements.extend([
+ p.clone() for p in source_provenance.members[source_key].elements
+ ])
+
+ return True
+
+
+# composite_list_overwrite
+#
+# Internal helper for list composition
+#
+# Args:
+# target_node (dict): A simple dictionary
+# target_key (dict): The key indicating a literal array to overwrite
+# source_node (dict): Another simple dictionary
+# source_key (str): The key indicating an array to overwrite the target with
+#
+# Returns:
+# (bool): True if a source list was found and compositing occurred
+#
+def composite_list_overwrite(target_node, target_key, source_node, source_key):
+
+ # We need to handle the legitimate case of overwriting a list with an empty
+ # list, hence the slightly odd default_value of [None] rather than [].
+ source_list = node_get(source_node, list, source_key, default_value=[None])
+ if source_list == [None]:
+ return False
+
+ target_provenance = node_get_provenance(target_node)
+ source_provenance = node_get_provenance(source_node)
+
+ target_node[target_key] = list_copy(source_list)
+ target_provenance.members[target_key] = source_provenance.members[source_key].clone()
+
+ return True
+
+
+# composite_list():
+#
+# Composite the source value onto the target value, if either
+# sides are lists, or dictionaries containing list compositing directives
+#
+# Args:
+# target_node (dict): A simple dictionary
+# source_node (dict): Another simple dictionary
+# key (str): The key to compose on
+#
+# Returns:
+# (bool): True if both sides were logical lists
+#
+# Raises:
+# (LoadError): If one side was a logical list and the other was not
+#
+def composite_list(target_node, source_node, key):
+ target_value = target_node.get(key)
+ source_value = source_node[key]
+
+ target_key_provenance = node_get_provenance(target_node, key)
+ source_key_provenance = node_get_provenance(source_node, key)
+
+ # Whenever a literal list is encountered in the source, it
+ # overwrites the target values and provenance completely.
+ #
+ if isinstance(source_value, list):
+
+ source_provenance = node_get_provenance(source_node)
+ target_provenance = node_get_provenance(target_node)
+
+ # Assert target type
+ if not (target_value is None or
+ isinstance(target_value, list) or
+ is_composite_list(target_value)):
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}: List cannot overwrite value at: {}"
+ .format(source_key_provenance, target_key_provenance))
+
+ composite_list_overwrite(target_node, key, source_node, key)
+ return True
+
+ # When a composite list is encountered in the source, then
+ # multiple outcomes can occur...
+ #
+ elif is_composite_list(source_value):
+
+ # If there is nothing there, then the composite list
+ # is copied in it's entirety as is, and preserved
+ # for later composition
+ #
+ if target_value is None:
+ source_provenance = node_get_provenance(source_node)
+ target_provenance = node_get_provenance(target_node)
+
+ target_node[key] = node_copy(source_value)
+ target_provenance.members[key] = source_provenance.members[key].clone()
+
+ # If the target is a literal list, then composition
+ # occurs directly onto that target, leaving the target
+ # as a literal list to overwrite anything in later composition
+ #
+ elif isinstance(target_value, list):
+ composite_list_overwrite(target_node, key, source_value, '(=)')
+ composite_list_prepend(target_node, key, source_value, '(<)')
+ composite_list_append(target_node, key, source_value, '(>)')
+
+ # If the target is a composite list, then composition
+ # occurs in the target composite list, and the composite
+ # target list is preserved in dictionary form for further
+ # composition.
+ #
+ elif is_composite_list(target_value):
+
+ if composite_list_overwrite(target_value, '(=)', source_value, '(=)'):
+
+ # When overwriting a target with composition directives, remove any
+ # existing prepend/append directives in the target before adding our own
+ target_provenance = node_get_provenance(target_value)
+
+ for directive in ['(<)', '(>)']:
+ try:
+ del target_value[directive]
+ del target_provenance.members[directive]
+ except KeyError:
+ # Ignore errors from deletion of non-existing keys
+ pass
+
+ # Prepend to the target prepend array, and append to the append array
+ composite_list_prepend(target_value, '(<)', source_value, '(<)')
+ composite_list_append(target_value, '(>)', source_value, '(>)')
+
+ else:
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}: List cannot overwrite value at: {}"
+ .format(source_key_provenance, target_key_provenance))
+
+ # We handled list composition in some way
+ return True
+
+ # Source value was not a logical list
+ return False
+
+
+# composite_dict():
+#
+# Composites values in target with values from source
+#
+# Args:
+# target (dict): A simple dictionary
+# source (dict): Another simple dictionary
+#
+# Raises: CompositeError
+#
+# Unlike the dictionary update() method, nested values in source
+# will not obsolete entire subdictionaries in target, instead both
+# dictionaries will be recursed and a composition of both will result
+#
+# This is useful for overriding configuration files and element
+# configurations.
+#
+def composite_dict(target, source, path=None):
+ target_provenance = ensure_provenance(target)
+ source_provenance = ensure_provenance(source)
+
+ for key, source_value in node_items(source):
+
+ # Track the full path of keys, only for raising CompositeError
+ if path:
+ thispath = path + '.' + key
+ else:
+ thispath = key
+
+ # Handle list composition separately
+ if composite_list(target, source, key):
+ continue
+
+ target_value = target.get(key)
+
+ if isinstance(source_value, collections.abc.Mapping):
+
+ # Handle creating new dicts on target side
+ if target_value is None:
+ target_value = {}
+ target[key] = target_value
+
+ # Give the new dict provenance
+ value_provenance = source_value.get(PROVENANCE_KEY)
+ if value_provenance:
+ target_value[PROVENANCE_KEY] = value_provenance.clone()
+
+ # Add a new provenance member element to the containing dict
+ target_provenance.members[key] = source_provenance.members[key]
+
+ if not isinstance(target_value, collections.abc.Mapping):
+ raise CompositeTypeError(thispath, type(target_value), type(source_value))
+
+ # Recurse into matching dictionary
+ composite_dict(target_value, source_value, path=thispath)
+
+ else:
+
+ if target_value is not None:
+
+ # Exception here: depending on how strings were declared ruamel may
+ # use a different type, but for our purposes, any stringish type will do.
+ if not (is_ruamel_str(source_value) and is_ruamel_str(target_value)) \
+ and not isinstance(source_value, type(target_value)):
+ raise CompositeTypeError(thispath, type(target_value), type(source_value))
+
+ # Overwrite simple values, lists and mappings have already been handled
+ target_provenance.members[key] = source_provenance.members[key].clone()
+ target[key] = source_value
+
+
+# Like composite_dict(), but raises an all purpose LoadError for convenience
+#
+def composite(target, source):
+ assert hasattr(source, 'get')
+
+ source_provenance = node_get_provenance(source)
+ try:
+ composite_dict(target, source)
+ except CompositeTypeError as e:
+ error_prefix = ""
+ if source_provenance:
+ error_prefix = "{}: ".format(source_provenance)
+ raise LoadError(LoadErrorReason.ILLEGAL_COMPOSITE,
+ "{}Expected '{}' type for configuration '{}', instead received '{}'"
+ .format(error_prefix,
+ e.expected_type.__name__,
+ e.path,
+ e.actual_type.__name__)) from e
+
+
+# Like composite(target, source), but where target overrides source instead.
+#
+def composite_and_move(target, source):
+ composite(source, target)
+
+ to_delete = [key for key, _ in node_items(target) if key not in source]
+ for key, value in source.items():
+ target[key] = value
+ for key in to_delete:
+ del target[key]
+
+
+# SanitizedDict is an OrderedDict that is dumped as unordered mapping.
+# This provides deterministic output for unordered mappings.
+#
+class SanitizedDict(collections.OrderedDict):
+ pass
+
+
+RoundTripRepresenter.add_representer(SanitizedDict,
+ SafeRepresenter.represent_dict)
+
+
+# Types we can short-circuit in node_sanitize for speed.
+__SANITIZE_SHORT_CIRCUIT_TYPES = (int, float, str, bool, tuple)
+
+
+# node_sanitize()
+#
+# Returnes an alphabetically ordered recursive copy
+# of the source node with internal provenance information stripped.
+#
+# Only dicts are ordered, list elements are left in order.
+#
+def node_sanitize(node):
+ # Short-circuit None which occurs ca. twice per element
+ if node is None:
+ return node
+
+ node_type = type(node)
+ # Next short-circuit integers, floats, strings, booleans, and tuples
+ if node_type in __SANITIZE_SHORT_CIRCUIT_TYPES:
+ return node
+ # Now short-circuit lists. Note this is only for the raw list
+ # type, CommentedSeq and others get caught later.
+ elif node_type is list:
+ return [node_sanitize(elt) for elt in node]
+
+ # Finally dict, and other Mappings need special handling
+ if node_type is dict or isinstance(node, collections.abc.Mapping):
+ result = SanitizedDict()
+
+ key_list = [key for key, _ in node_items(node)]
+ for key in sorted(key_list):
+ result[key] = node_sanitize(node[key])
+
+ return result
+ # Catch the case of CommentedSeq and friends. This is more rare and so
+ # we keep complexity down by still using isinstance here.
+ elif isinstance(node, list):
+ return [node_sanitize(elt) for elt in node]
+
+ # Everything else (such as commented scalars) just gets returned as-is.
+ return node
+
+
+# node_validate()
+#
+# Validate the node so as to ensure the user has not specified
+# any keys which are unrecognized by buildstream (usually this
+# means a typo which would otherwise not trigger an error).
+#
+# Args:
+# node (dict): A dictionary loaded from YAML
+# valid_keys (list): A list of valid keys for the specified node
+#
+# Raises:
+# LoadError: In the case that the specified node contained
+# one or more invalid keys
+#
+def node_validate(node, valid_keys):
+
+ # Probably the fastest way to do this: https://stackoverflow.com/a/23062482
+ valid_keys = set(valid_keys)
+ valid_keys.add(PROVENANCE_KEY)
+ invalid = next((key for key in node if key not in valid_keys), None)
+
+ if invalid:
+ provenance = node_get_provenance(node, key=invalid)
+ raise LoadError(LoadErrorReason.INVALID_DATA,
+ "{}: Unexpected key: {}".format(provenance, invalid))
+
+
+# Node copying
+#
+# Unfortunately we copy nodes a *lot* and `isinstance()` is super-slow when
+# things from collections.abc get involved. The result is the following
+# intricate but substantially faster group of tuples and the use of `in`.
+#
+# If any of the {node,list}_copy routines raise a ValueError
+# then it's likely additional types need adding to these tuples.
+
+
+# These types just have their value copied
+__QUICK_TYPES = (str, bool,
+ yaml.scalarstring.PreservedScalarString,
+ yaml.scalarstring.SingleQuotedScalarString,
+ yaml.scalarstring.DoubleQuotedScalarString)
+
+# These types have to be iterated like a dictionary
+__DICT_TYPES = (dict, yaml.comments.CommentedMap)
+
+# These types have to be iterated like a list
+__LIST_TYPES = (list, yaml.comments.CommentedSeq)
+
+# These are the provenance types, which have to be cloned rather than any other
+# copying tactic.
+__PROVENANCE_TYPES = (Provenance, DictProvenance, MemberProvenance, ElementProvenance)
+
+# These are the directives used to compose lists, we need this because it's
+# slightly faster during the node_final_assertions checks
+__NODE_ASSERT_COMPOSITION_DIRECTIVES = ('(>)', '(<)', '(=)')
+
+
+def node_copy(source):
+ copy = {}
+ for key, value in source.items():
+ value_type = type(value)
+ if value_type in __DICT_TYPES:
+ copy[key] = node_copy(value)
+ elif value_type in __LIST_TYPES:
+ copy[key] = list_copy(value)
+ elif value_type in __PROVENANCE_TYPES:
+ copy[key] = value.clone()
+ elif value_type in __QUICK_TYPES:
+ copy[key] = value
+ else:
+ raise ValueError("Unable to be quick about node_copy of {}".format(value_type))
+
+ ensure_provenance(copy)
+
+ return copy
+
+
+def list_copy(source):
+ copy = []
+ for item in source:
+ item_type = type(item)
+ if item_type in __DICT_TYPES:
+ copy.append(node_copy(item))
+ elif item_type in __LIST_TYPES:
+ copy.append(list_copy(item))
+ elif item_type in __PROVENANCE_TYPES:
+ copy.append(item.clone())
+ elif item_type in __QUICK_TYPES:
+ copy.append(item)
+ else:
+ raise ValueError("Unable to be quick about list_copy of {}".format(item_type))
+
+ return copy
+
+
+# node_final_assertions()
+#
+# This must be called on a fully loaded and composited node,
+# after all composition has completed.
+#
+# Args:
+# node (Mapping): The final composited node
+#
+# Raises:
+# (LoadError): If any assertions fail
+#
+def node_final_assertions(node):
+ for key, value in node_items(node):
+
+ # Assert that list composition directives dont remain, this
+ # indicates that the user intended to override a list which
+ # never existed in the underlying data
+ #
+ if key in __NODE_ASSERT_COMPOSITION_DIRECTIVES:
+ provenance = node_get_provenance(node, key)
+ raise LoadError(LoadErrorReason.TRAILING_LIST_DIRECTIVE,
+ "{}: Attempt to override non-existing list".format(provenance))
+
+ value_type = type(value)
+
+ if value_type in __DICT_TYPES:
+ node_final_assertions(value)
+ elif value_type in __LIST_TYPES:
+ list_final_assertions(value)
+
+
+def list_final_assertions(values):
+ for value in values:
+ value_type = type(value)
+
+ if value_type in __DICT_TYPES:
+ node_final_assertions(value)
+ elif value_type in __LIST_TYPES:
+ list_final_assertions(value)
+
+
+# assert_symbol_name()
+#
+# A helper function to check if a loaded string is a valid symbol
+# name and to raise a consistent LoadError if not. For strings which
+# are required to be symbols.
+#
+# Args:
+# provenance (Provenance): The provenance of the loaded symbol, or None
+# symbol_name (str): The loaded symbol name
+# purpose (str): The purpose of the string, for an error message
+# allow_dashes (bool): Whether dashes are allowed for this symbol
+#
+# Raises:
+# LoadError: If the symbol_name is invalid
+#
+# Note that dashes are generally preferred for variable names and
+# usage in YAML, but things such as option names which will be
+# evaluated with jinja2 cannot use dashes.
+#
+def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True):
+ valid_chars = string.digits + string.ascii_letters + '_'
+ if allow_dashes:
+ valid_chars += '-'
+
+ valid = True
+ if not symbol_name:
+ valid = False
+ elif any(x not in valid_chars for x in symbol_name):
+ valid = False
+ elif symbol_name[0] in string.digits:
+ valid = False
+
+ if not valid:
+ detail = "Symbol names must contain only alphanumeric characters, " + \
+ "may not start with a digit, and may contain underscores"
+ if allow_dashes:
+ detail += " or dashes"
+
+ message = "Invalid symbol name for {}: '{}'".format(purpose, symbol_name)
+ if provenance is not None:
+ message = "{}: {}".format(provenance, message)
+
+ raise LoadError(LoadErrorReason.INVALID_SYMBOL_NAME,
+ message, detail=detail)
diff --git a/buildstream/element.py b/buildstream/element.py
index 07745dbf0..0d165370c 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -86,6 +86,7 @@ import shutil
import string
from . import _yaml
+from . import _yaml_roundtrip
from ._variables import Variables
from ._versions import BST_CORE_ARTIFACT_VERSION
from ._exceptions import BstError, LoadError, LoadErrorReason, ImplError, \
@@ -1750,35 +1751,35 @@ class Element(Plugin):
shutil.copyfile(log_filename, self._build_log_path)
# Store public data
- _yaml.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml'))
+ _yaml_roundtrip.dump(_yaml.node_sanitize(self.__dynamic_public), os.path.join(metadir, 'public.yaml'))
# Store result
build_result_dict = {"success": self.__build_result[0], "description": self.__build_result[1]}
if self.__build_result[2] is not None:
build_result_dict["detail"] = self.__build_result[2]
- _yaml.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml'))
+ _yaml_roundtrip.dump(build_result_dict, os.path.join(metadir, 'build-result.yaml'))
# ensure we have cache keys
self._assemble_done()
# Store keys.yaml
- _yaml.dump(_yaml.node_sanitize({
+ _yaml_roundtrip.dump(_yaml.node_sanitize({
'strong': self._get_cache_key(),
'weak': self._get_cache_key(_KeyStrength.WEAK),
}), os.path.join(metadir, 'keys.yaml'))
# Store dependencies.yaml
- _yaml.dump(_yaml.node_sanitize({
+ _yaml_roundtrip.dump(_yaml.node_sanitize({
e.name: e._get_cache_key() for e in self.dependencies(Scope.BUILD)
}), os.path.join(metadir, 'dependencies.yaml'))
# Store workspaced.yaml
- _yaml.dump(_yaml.node_sanitize({
+ _yaml_roundtrip.dump(_yaml.node_sanitize({
'workspaced': bool(self._get_workspace())
}), os.path.join(metadir, 'workspaced.yaml'))
# Store workspaced-dependencies.yaml
- _yaml.dump(_yaml.node_sanitize({
+ _yaml_roundtrip.dump(_yaml.node_sanitize({
'workspaced-dependencies': [
e.name for e in self.dependencies(Scope.BUILD)
if e._get_workspace()
diff --git a/buildstream/plugintestutils/runcli.py b/buildstream/plugintestutils/runcli.py
index 5dbecdd60..558f8f964 100644
--- a/buildstream/plugintestutils/runcli.py
+++ b/buildstream/plugintestutils/runcli.py
@@ -52,6 +52,8 @@ from _pytest.capture import MultiCapture, FDCapture, FDCaptureBinary
# Import the main cli entrypoint
from buildstream._frontend import cli as bst_cli
from buildstream import _yaml
+from buildstream import _yaml_roundtrip
+
# Special private exception accessor, for test case purposes
from buildstream._exceptions import BstError, get_last_exception, get_last_task_error
@@ -542,7 +544,7 @@ class CliIntegration(Cli):
_yaml.composite_dict(base_config, project_config)
base_config = _yaml.node_sanitize(base_config)
- _yaml.dump(base_config, project_filename)
+ _yaml_roundtrip.dump(base_config, project_filename)
else:
@@ -645,6 +647,6 @@ def configured(directory, config=None):
# Dump it and yield the filename for test scripts to feed it
# to buildstream as an artument
filename = os.path.join(directory, "buildstream.conf")
- _yaml.dump(config, filename)
+ _yaml_roundtrip.dump(config, filename)
yield filename
diff --git a/buildstream/source.py b/buildstream/source.py
index b0ba1c744..31156854f 100644
--- a/buildstream/source.py
+++ b/buildstream/source.py
@@ -166,7 +166,7 @@ from collections.abc import Mapping
from contextlib import contextmanager
from . import Plugin, Consistency
-from . import _yaml, utils
+from . import _yaml, utils, _yaml_roundtrip
from ._exceptions import BstError, ImplError, ErrorDomain
from ._loader.metasource import MetaSource
from ._projectrefs import ProjectRefStorage
@@ -889,7 +889,8 @@ class Source(Plugin):
# Save the ref in the originating file
#
try:
- _yaml.dump(provenance.toplevel, provenance.filename.name)
+ # This is our roundtrip dump from the track
+ _yaml_roundtrip.dump(provenance.toplevel, provenance.filename.name)
except OSError as e:
raise SourceError("{}: Error saving source reference to '{}': {}"
.format(self, provenance.filename.name, e),