diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2020-07-22 11:09:41 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2020-07-22 11:09:41 +0000 |
commit | e657119f258153f16e12a17ab574524b3f614cbc (patch) | |
tree | 1a3bb04c607ed1475c11b8f23cfe8b9d37c6230f | |
parent | 8d7ee1885e2c458c4f2edf113e5a2d990235f708 (diff) | |
parent | dcaaa0be1924a503fe05c63112cfcaf3335eed05 (diff) | |
download | buildstream-e657119f258153f16e12a17ab574524b3f614cbc.tar.gz |
Merge branch 'tristan/bst-1/partial-variables' into 'bst-1'
Variables non-recursive and lazy resolution refactor (bst-1)
See merge request BuildStream/buildstream!2001
19 files changed, 812 insertions, 157 deletions
diff --git a/buildstream/_exceptions.py b/buildstream/_exceptions.py index 8f3f17b9d..aeacf8dcd 100644 --- a/buildstream/_exceptions.py +++ b/buildstream/_exceptions.py @@ -217,7 +217,7 @@ class LoadErrorReason(Enum): RECURSIVE_INCLUDE = 21 # A recursive variable has been encountered - RECURSIVE_VARIABLE = 22 + CIRCULAR_REFERENCE_VARIABLE = 22 # An attempt so set the value of a protected variable PROTECTED_VARIABLE_REDEFINED = 23 diff --git a/buildstream/_frontend/widget.py b/buildstream/_frontend/widget.py index 80fffd201..88b7350b4 100644 --- a/buildstream/_frontend/widget.py +++ b/buildstream/_frontend/widget.py @@ -387,7 +387,7 @@ class LogLine(Widget): # Variables if "%{vars" in format_: - variables = _yaml.node_sanitize(element._Element__variables.variables) + variables = dict(element._Element__variables) line = p.fmt_subst( line, 'vars', yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True)) diff --git a/buildstream/_variables.py b/buildstream/_variables.py index 1a52b5680..322f3f7b7 100644 --- a/buildstream/_variables.py +++ b/buildstream/_variables.py @@ -1,5 +1,6 @@ # -# Copyright (C) 2016 Codethink Limited +# Copyright (C) 2020 Codethink Limited +# Copyright (C) 2019 Bloomberg L.P. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -16,184 +17,657 @@ # # Authors: # Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> +# Daniel Silverstone <daniel.silverstone@codethink.co.uk> +# Benjamin Schubert <bschubert@bloomberg.net> import re +import sys +import itertools from ._exceptions import LoadError, LoadErrorReason from . import _yaml -# Variables are allowed to have dashes here +######################################################## +# Understanding Value Expressions # +######################################################## +# +# This code uses the term "value expression" a lot to refer to `str` objects +# which have references to variables in them, and also to `list` objects which +# are effectively broken down strings. +# +# Ideally we would have a ValueExpression type in order to make this more +# comprehensive, but this would unfortunately introduce unnecessary overhead, +# making the code measurably slower. +# +# Value Expression Strings +# ------------------------ +# Strings which contain variables in them, such as: +# +# "My name is %{username}, good day." +# +# +# Parsed Value Expression Lists +# ----------------------------- +# Using `re.split()` from python's regular expression implementation, we +# parse the list using our locally defined VALUE_EXPRESSION_REGEX, which +# breaks down the string into a list of "literal" and "variable" components. +# +# The "literal" components are literal portions of the string which need +# no substitution, while the "variable" components represent variable names +# which need to be substituted with their corresponding resolved values. +# +# The parsed variable expressions have the following properties: +# +# * They are sparse, some of the "literal" values contain zero length +# strings which can be ignored. +# +# * Literal values are found only at even indices of the parsed +# variable expression +# +# * Variable names are found only at odd indices +# +# The above example "My name is %{username}, good day." is broken down +# into a parsed value expression as follows: +# +# [ +# "My name is ", # <- Index 0, literal value +# "username", # <- Index 1, variable name, '%{ ... }' discarded +# ", good day." # <- Index 2, literal value +# ] # -_VARIABLE_MATCH = r'\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}' + +# Maximum recursion depth using the fast (recursive) variable resolution +# algorithm. +# +MAX_RECURSION_DEPTH = 200 + +# Regular expression used to parse %{variables} in value expressions +# +# Note that variables are allowed to have dashes +# +VALUE_EXPRESSION_REGEX = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") + +# Cache for the parsed expansion strings. +# +VALUE_EXPRESSION_CACHE = { + # Prime the cache with the empty string since otherwise that can + # cause issues with the parser, complications to which cause slowdown + "": [""], +} -# The Variables helper object will resolve the variable references in -# the given dictionary, expecting that any dictionary values which contain -# variable references can be resolved from the same dictionary. +# Variables() +# +# The Variables object resolves the variable references in the given MappingNode, +# expecting that any dictionary values which contain variable references can be +# resolved from the same dictionary. # # Each Element creates its own Variables instance to track the configured # variable settings for the element. # +# Notably, this object is delegated the responsibility of expanding +# variables in yaml Node hierarchies and substituting variables in strings +# in the context of a given Element's variable configuration. +# # Args: # node (dict): A node loaded and composited with yaml tools # # Raises: -# LoadError, if unresolved variables occur. +# LoadError, if unresolved variables, or cycles in resolution, occur. # -class Variables(): +class Variables: + ################################################################# + # Dunder Methods # + ################################################################# def __init__(self, node): - self.original = node - self.variables = self._resolve(node) + # The original MappingNode, we need to keep this + # around for proper error reporting. + # + self._original = node - # subst(): + # The value map, this dictionary contains either unresolved + # value expressions, or resolved values. + # + # Each mapping value is a list, in the case that the value + # is resolved, then the list is only 1 element long. + # + self._values = self._init_values(node) + + # __getitem__() # - # Substitutes any variables in 'string' and returns the result. + # Fetches a resolved variable by it's name, allows + # addressing the Variables instance like a dictionary. # # Args: - # (string): The string to substitute + # name (str): The name of the variable # # Returns: - # (string): The new string with any substitutions made + # (str): The resolved variable value # # Raises: - # LoadError, if the string contains unresolved variable references. - # - def subst(self, string): - substitute, unmatched, _ = self._subst(string, self.variables) - unmatched = list(set(unmatched)) - if unmatched: - if len(unmatched) == 1: - message = "Unresolved variable '{var}'".format(var=unmatched[0]) - else: - message = "Unresolved variables: " - for unmatch in unmatched: - if unmatched.index(unmatch) > 0: - message += ', ' - message += unmatch + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def __getitem__(self, name): + if name not in self._values: + raise KeyError(name) - raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, message) + return self._expand_var(name) - return substitute + # __contains__() + # + # Checks whether a given variable exists, allows + # supporting `if 'foo' in variables` expressions. + # + # Args: + # name (str): The name of the variable to check for + # + # Returns: + # (bool): True if `name` is a valid variable + # + def __contains__(self, name): + return name in self._values - def _subst(self, string, variables): + # __iter__() + # + # Provide an iterator for all variables effective values + # + # Returns: + # (Iterator[Tuple[str, str]]) + # + def __iter__(self): + return _VariablesIterator(self) - def subst_callback(match): - nonlocal variables - nonlocal unmatched - nonlocal matched + ################################################################# + # Public API # + ################################################################# - token = match.group(0) - varname = match.group(1) + # check() + # + # Assert that all variables declared on this Variables + # instance have been resolved properly, and reports errors + # for undefined references and circular references. + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def check(self): - value = _yaml.node_get(variables, str, varname, default_value=None) - if value is not None: - # We have to check if the inner string has variables - # and return unmatches for those - unmatched += re.findall(_VARIABLE_MATCH, value) - matched += [varname] - else: - # Return unmodified token - unmatched += [varname] - value = token + # Just resolve all variables. + for key in self._values.keys(): + self._expand_var(key) - return value + # get() + # + # Expand definition of variable by name. If the variable is not + # defined, it will return None instead of failing. + # + # Args: + # name (str): Name of the variable to expand + # + # Returns: + # (str|None): The expanded value for the variable or None variable was not defined. + # + def get(self, name): + if name not in self._values: + return None + return self[name] - matched = [] - unmatched = [] - replacement = re.sub(_VARIABLE_MATCH, subst_callback, string) + # subst(): + # + # Substitutes any variables in 'string' and returns the result. + # + # Args: + # string (str): The string to substitute + # provenance (Provenance): The provenance of the string + # + # Returns: + # (str): The new string with any substitutions made + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def subst(self, string, provenance): + value_expression = _parse_value_expression(string) + return self._expand_value_expression(value_expression, provenance) - return (replacement, unmatched, matched) + ################################################################# + # Private API # + ################################################################# - # Variable resolving code + # _init_values() # - # Here we substitute variables for values (resolve variables) repeatedly - # in a dictionary, each time creating a new dictionary until there is no - # more unresolved variables to resolve, or, until resolving further no - # longer resolves anything, in which case we throw an exception. - def _resolve(self, node): - variables = node - + # Initialize the table of values. + # + # The value table is a dictionary keyed by the variable names where + # the values are value expressions (lists) which are initially unresolved. + # + # Value expressions are later resolved on demand and replaced in this + # table with single element lists. + # + # Args: + # node (dict): The original variables mapping node + # + # Returns: + # (dict): A dictionary of value expressions (lists) + # + def _init_values(self, node): # Special case, if notparallel is specified in the variables for this # element, then override max-jobs to be 1. # Initialize it as a string as all variables are processed as strings. # - if _yaml.node_get(variables, bool, 'notparallel', default_value=False): - variables['max-jobs'] = str(1) + if _yaml.node_get(node, bool, 'notparallel', default_value=False): + node['max-jobs'] = str(1) - # Resolve the dictionary once, reporting the new dictionary with things - # substituted in it, and reporting unmatched tokens. - # - def resolve_one(variables): - unmatched = [] - resolved = {} + ret = {} + for key in node.keys(): + value = _yaml.node_get(node, str, key) + ret[sys.intern(key)] = _parse_value_expression(value) - for key, value in _yaml.node_items(variables): + return ret - # Ensure stringness of the value before substitution - value = _yaml.node_get(variables, str, key) + # _expand_var() + # + # Expand and cache a variable definition. + # + # This will try the fast, recursive path first and fallback to + # the slower iterative codepath. + # + # Args: + # name (str): Name of the variable to expand + # + # Returns: + # (str): The expanded value of variable + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def _expand_var(self, name): + try: + return self._fast_expand_var(name) + except (KeyError, RecursionError): + return self._slow_expand_var(name) - resolved_var, item_unmatched, matched = self._subst(value, variables) + # _expand_value_expression() + # + # Expands a value expression + # + # This will try the fast, recursive path first and fallback to + # the slower iterative codepath. + # + # Args: + # value_expression (list): The parsed value expression to be expanded + # provenance (Provenance): The provenance of the value expression + # + # Returns: + # (str): The expanded value expression + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def _expand_value_expression(self, value_expression, provenance): + try: + return self._fast_expand_value_expression(value_expression) + except (KeyError, RecursionError): + return self._slow_expand_value_expression(None, value_expression, provenance) - if _wrap_variable(key) in resolved_var: - referenced_through = find_recursive_variable(key, matched, variables) - raise LoadError(LoadErrorReason.RECURSIVE_VARIABLE, - "{}: ".format(_yaml.node_get_provenance(variables, key)) + - ("Variable '{}' expands to contain a reference to itself. " + - "Perhaps '{}' contains '{}").format(key, referenced_through, _wrap_variable(key))) + ################################################################# + # Resolution algorithm: fast path # + ################################################################# - resolved[key] = resolved_var - unmatched += item_unmatched + # _fast_expand_var() + # + # Fast, recursive path for variable expansion + # + # Args: + # name (str): Name of the variable to expand + # counter (int): Number of recursion cycles (used only in recursion) + # + # Returns: + # (str): The expanded value of variable + # + # Raises: + # (KeyError): If a reference to an undefined variable is encountered + # (RecursionError): If MAX_RECURSION_DEPTH recursion cycles is reached + # + def _fast_expand_var(self, name, counter=0): - # Carry over provenance - resolved[_yaml.PROVENANCE_KEY] = variables[_yaml.PROVENANCE_KEY] - return (resolved, unmatched) + value_expression = self._values[name] + if len(value_expression) > 1: + sub = self._fast_expand_value_expression(value_expression, counter) + value_expression = [sys.intern(sub)] + self._values[name] = value_expression - # Resolve it until it's resolved or broken + return value_expression[0] + + # _fast_expand_value_expression() + # + # Fast, recursive path for value expression expansion. + # + # Args: + # value_expression (list): The parsed value expression to be expanded + # counter (int): Number of recursion cycles (used only in recursion) + # + # Returns: + # (str): The expanded value expression + # + # Raises: + # (KeyError): If a reference to an undefined variable is encountered + # (RecursionError): If MAX_RECURSION_DEPTH recursion cycles is reached + # + def _fast_expand_value_expression(self, value_expression, counter=0): + if counter > MAX_RECURSION_DEPTH: + raise RecursionError() + + acc = [] + for idx, value in enumerate(value_expression): + if (idx % 2) == 0: + acc.append(value) + else: + acc.append(self._fast_expand_var(value, counter + 1)) + + return "".join(acc) + + ################################################################# + # Resolution algorithm: slow path # + ################################################################# + + # _slow_expand_var() + # + # Slow, iterative path for variable expansion with full error reporting + # + # Args: + # name (str): Name of the variable to expand + # + # Returns: + # (str): The expanded value of variable + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def _slow_expand_var(self, name): + value_expression = self._get_checked_value_expression(name, None, None) + if len(value_expression) > 1: + expanded = self._slow_expand_value_expression(name, value_expression, None) + value_expression = [sys.intern(expanded)] + self._values[name] = value_expression + + return value_expression[0] + + # _slow_expand_value_expression() + # + # Slow, iterative path for value expression expansion with full error reporting + # + # Note that either `varname` or `node` must be provided, these are used to + # identify the provenance of this value expression (which might be the value + # of a variable, or a value expression found elswhere in project YAML which + # needs to be substituted). + # + # Args: + # varname (str|None): The variable name associated with this value expression, if any + # value_expression (list): The parsed value expression to be expanded + # provenance (Provenance): The provenance who is asking for an expansion + # + # Returns: + # (str): The expanded value expression + # + # Raises: + # (LoadError): In the case of an undefined variable or + # a cyclic variable reference + # + def _slow_expand_value_expression(self, varname, value_expression, provenance): + idx = 0 + resolved_value = None + + # We will collect the varnames and value expressions which need + # to be resolved in the loop, sorted by dependency, and then + # finally reverse through them resolving them one at a time # - resolved = variables - unmatched = ['dummy'] - last_unmatched = ['dummy'] - while unmatched: - resolved, unmatched = resolve_one(resolved) - - # Lists of strings can be compared like this - if unmatched == last_unmatched: - # We've got the same result twice without matching everything, - # something is undeclared or cyclic, compose a summary. + resolved_varnames = [] + resolved_values = [] + + step = ResolutionStep(varname, value_expression, None) + while step: + # Keep a hold of the current overall step + this_step = step + step = step.prev + + # Check for circular dependencies + this_step.check_circular(self._original) + + for idx, value in enumerate(this_step.value_expression): + + # Skip literal parts of the value expression + if (idx % 2) == 0: + continue + + iter_value_expression = self._get_checked_value_expression(value, this_step.referee, provenance) + + # Queue up this value. # - summary = '' - for unmatch in set(unmatched): - for var, provenance in self._find_references(unmatch): - line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}\n" - summary += line.format(unmatched=unmatch, variable=var, provenance=provenance) + # Even if the value was already resolved, we need it in context to resolve + # previously enqueued variables + resolved_values.append(iter_value_expression) + resolved_varnames.append(value) + + # Queue up the values dependencies. + # + if len(iter_value_expression) > 1: + new_step = ResolutionStep(value, iter_value_expression, this_step) + + # Link it to the end of the stack + new_step.prev = step + step = new_step + + # We've now constructed the dependencies queue such that + # later dependencies are on the right, we can now safely peddle + # backwards and the last (leftmost) resolved value is the one + # we want to return. + # + for iter_value_expression, resolved_varname in zip(reversed(resolved_values), reversed(resolved_varnames)): + + # Resolve as needed + # + if len(iter_value_expression) > 1: + resolved_value = self._resolve_value_expression(iter_value_expression) + iter_value_expression = [resolved_value] + if resolved_varname is not None: + self._values[resolved_varname] = iter_value_expression - raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, - "Failed to resolve one or more variable:\n{}".format(summary)) + return resolved_value - last_unmatched = unmatched + # _get_checked_value_expression() + # + # Fetches a value expression from the value table and raises a user + # facing error if the value is undefined. + # + # Args: + # varname (str): The variable name to fetch + # referee (str): The variable name referring to `varname`, or None + # provenance (Provenance): The provenance for which we need to resolve `name` + # + # Returns: + # (list): The value expression for varname + # + # Raises: + # (LoadError): An appropriate error in case of undefined variables + # + def _get_checked_value_expression(self, varname, referee=None, provenance=None): + + # + # Fetch the value and detect undefined references + # + try: + return self._values[varname] + except KeyError as e: + + # Either the provenance is the toplevel calling provenance, + # or it is the provenance of the direct referee + if referee: + p = _yaml.node_get_provenance(self._original, referee) + else: + p = provenance - return resolved + error_message = "Reference to undefined variable '{}'".format(varname) + if p: + error_message = "{}: {}".format(p, error_message) + raise LoadError(LoadErrorReason.UNRESOLVED_VARIABLE, error_message) from e - # Helper function to fetch information about the node referring to a variable + # _resolve_value_expression() # - def _find_references(self, varname): - fullname = _wrap_variable(varname) - for key, value in _yaml.node_items(self.original): - if fullname in value: - provenance = _yaml.node_get_provenance(self.original, key) - yield (key, provenance) + # Resolves a value expression with the expectation that all + # variables within this value expression have already been + # resolved and updated in the Variables._values table. + # + # This is used as a part of the iterative resolution codepath, + # where value expressions are first sorted by dependency before + # being resolved in one go. + # + # Args: + # value_expression (list): The value expression to resolve + # + # Returns: + # (str): The resolved value expression + # + def _resolve_value_expression(self, value_expression): + acc = [] + + for idx, value in enumerate(value_expression): + if (idx % 2) == 0: + acc.append(value) + else: + acc.append(self._values[value][0]) + + return "".join(acc) + + +# ResolutionStep() +# +# The context for a single iteration in variable resolution. +# +# Args: +# referee (str): The name of the referring variable +# value_expression (list): The parsed value expression to be expanded +# parent (ResolutionStep): The parent ResolutionStep +# +class ResolutionStep: + + def __init__(self, referee, value_expression, parent): + self.referee = referee + self.value_expression = value_expression + self.parent = parent + self.prev = None + + # check_circular() + # + # Check for circular references in this step. + # + # Args: + # original_values (MappingNode): The original MappingNode for the Variables + # + # Raises: + # (LoadError): Will raise a user facing LoadError with + # LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case + # circular references were encountered. + # + def check_circular(self, original_values): + step = self.parent + while step: + if self.referee is step.referee: + self._raise_circular_reference_error(step, original_values) + step = step.parent + + # _raise_circular_reference_error() + # + # Helper function to construct a full report and raise the LoadError + # with LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE. + # + # Args: + # conflict (ResolutionStep): The resolution step which conflicts with this step + # original_values (MappingNode): The original node to extract provenances from + # + # Raises: + # (LoadError): Unconditionally + # + def _raise_circular_reference_error(self, conflict, original_values): + error_lines = [] + step = self + + while step is not conflict: + if step.parent: + referee = step.parent.referee + else: + referee = self.referee + + provenance = _yaml.node_get_provenance(original_values, referee) + error_lines.append("{}: Variable '{}' refers to variable '{}'".format(provenance, referee, step.referee)) + step = step.parent + + raise LoadError(LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE, + "Circular dependency detected on variable '{}'".format(self.referee), + detail="\n".join(reversed(error_lines))) + + +# _parse_value_expression() +# +# Tries to fetch the parsed value expression from the cache, parsing and +# caching value expressions on demand and returns the parsed value expression. +# +# Args: +# value_expression (str): The value expression in string form to parse +# +# Returns: +# (list): The parsed value expression in list form. +# +def _parse_value_expression(value_expression): + + try: + return VALUE_EXPRESSION_CACHE[value_expression] + except KeyError: + # This use of the regex turns a string like "foo %{bar} baz" into + # a list ["foo ", "bar", " baz"] + # + # The result is a parsed value expression, where even indicies + # contain literal parts of the value and odd indices contain + # variable names which need to be replaced by resolved variables. + # + splits = VALUE_EXPRESSION_REGEX.split(value_expression) + + # Optimize later routines by discarding any unnecessary trailing + # empty strings. + # + if splits[-1] == '': + del splits[-1] + + # We intern the string parts to try and reduce the memory impact + # of the cache. + # + ret = [sys.intern(s) for s in splits] + + # Cache and return the value expression + # + VALUE_EXPRESSION_CACHE[value_expression] = ret + return ret + +# Iterator for all flatten variables. +# Used by Variables.__iter__ +class _VariablesIterator: -def find_recursive_variable(variable, matched_variables, all_vars): - matched_values = (_yaml.node_get(all_vars, str, key) for key in matched_variables) - for key, value in zip(matched_variables, matched_values): - if _wrap_variable(variable) in value: - return key - else: - return None + def __init__(self, variables): + self._variables = variables + self._iter = iter(variables._values) + def __iter__(self): + return self -def _wrap_variable(var): - return "%{" + var + "}" + def __next__(self): + name = next(self._iter) + return name, self._variables._expand_var(name) diff --git a/buildstream/element.py b/buildstream/element.py index ef871b7ad..4942f5560 100644 --- a/buildstream/element.py +++ b/buildstream/element.py @@ -235,6 +235,8 @@ class Element(Plugin): variables = self.__extract_variables(meta) variables['element-name'] = self.name self.__variables = Variables(variables) + if not self.__is_junction: + self.__variables.check() # Collect the composited environment now that we have variables env = self.__extract_environment(meta) @@ -481,10 +483,10 @@ class Element(Plugin): name = self.node_subst_member(node, 'name') """ value = self.node_get_member(node, str, member_name, default) + provenance = _yaml.node_get_provenance(node, key=member_name) try: - return self.__variables.subst(value) + return self.__variables.subst(value, provenance) except LoadError as e: - provenance = _yaml.node_get_provenance(node, key=member_name) raise LoadError(e.reason, '{}: {}'.format(provenance, str(e))) from e def node_subst_list(self, node, member_name): @@ -507,10 +509,10 @@ class Element(Plugin): value = self.node_get_member(node, list, member_name) ret = [] for index, x in enumerate(value): + provenance = _yaml.node_get_provenance(node, key=member_name, indices=[index]) try: - ret.append(self.__variables.subst(x)) + ret.append(self.__variables.subst(x, provenance)) except LoadError as e: - provenance = _yaml.node_get_provenance(node, key=member_name, indices=[index]) raise LoadError(e.reason, '{}: {}'.format(provenance, str(e))) from e return ret @@ -549,10 +551,10 @@ class Element(Plugin): node, 'strings', [ i ]) """ value = self.node_get_list_element(node, str, member_name, indices) + provenance = _yaml.node_get_provenance(node, key=member_name, indices=indices) try: - return self.__variables.subst(value) + return self.__variables.subst(value, provenance) except LoadError as e: - provenance = _yaml.node_get_provenance(node, key=member_name, indices=indices) raise LoadError(e.reason, '{}: {}'.format(provenance, str(e))) from e def compute_manifest(self, *, include=None, exclude=None, orphans=True): @@ -884,10 +886,7 @@ class Element(Plugin): (str): The resolved value for *varname*, or None if no variable was declared with the given name. """ - if varname in self.__variables.variables: - return self.__variables.variables[varname] - - return None + return self.__variables.get(varname) ############################################################# # Private Methods used in BuildStream # @@ -1910,7 +1909,7 @@ class Element(Plugin): # (str): The string after substitutions have occurred # def _subst_string(self, value): - return self.__variables.subst(value) + return self.__variables.subst(value, None) # Returns the element whose sources this element is ultimately derived from. # @@ -2294,11 +2293,15 @@ class Element(Plugin): _yaml.node_final_assertions(element_public) # Also, resolve any variables in the public split rules directly + new_base_splits = {} for domain, splits in self.node_items(base_splits): - base_splits[domain] = [ - self.__variables.subst(split.strip()) - for split in splits - ] + new_splits = [] + for index, split in enumerate(splits): + provenance = _yaml.node_get_provenance(base_splits, key=domain, indices=[index]) + new_splits.append( + self.__variables.subst(split.strip(), provenance) + ) + base_splits[domain] = new_splits return element_public @@ -2371,7 +2374,15 @@ class Element(Plugin): if not self.__whitelist_regex: bstdata = self.get_public_data('bst') whitelist = _yaml.node_get(bstdata, list, 'overlap-whitelist', default_value=[]) - whitelist_expressions = [utils._glob2re(self.__variables.subst(exp.strip())) for exp in whitelist] + whitelist_expressions = [ + utils._glob2re( + self.__variables.subst( + exp.strip(), + _yaml.node_get_provenance(bstdata, key='overlap-whitelist', indices=[index]) + ) + ) + for index, exp in enumerate(whitelist) + ] expression = ('^(?:' + '|'.join(whitelist_expressions) + ')$') self.__whitelist_regex = re.compile(expression) return self.__whitelist_regex.match(path) or self.__whitelist_regex.match(os.path.join(os.sep, path)) diff --git a/tests/format/variables.py b/tests/format/variables.py index 098837af3..a888818e8 100644 --- a/tests/format/variables.py +++ b/tests/format/variables.py @@ -65,28 +65,107 @@ def test_overrides(cli, datafiles, tmpdir, target, varname, expected): assert result_vars[varname] == expected -@pytest.mark.datafiles(os.path.join(DATA_DIR, 'missing_variables')) -def test_missing_variable(cli, datafiles, tmpdir): - project = os.path.join(datafiles.dirname, datafiles.basename) - result = cli.run(project=project, silent=True, args=[ - 'show', '--deps', 'none', '--format', '%{config}', 'manual.bst' - ]) - result.assert_main_error(ErrorDomain.LOAD, - LoadErrorReason.UNRESOLVED_VARIABLE) +@pytest.mark.parametrize( + "element,provenance", + [ + # This test makes a reference to an undefined variable in a build command + ("manual.bst", "manual.bst [line 5 column 6]"), + # This test makes a reference to an undefined variable by another variable, + # ensuring that we validate variables even when they are unused + ("manual2.bst", "manual2.bst [line 4 column 8]"), + # This test uses a build command to refer to some variables which ultimately + # refer to an undefined variable, testing a more complex case. + ("manual3.bst", "manual3.bst [line 6 column 8]"), + ], + ids=["build-command", "variables", "complex"], +) +@pytest.mark.datafiles(os.path.join(DATA_DIR, "missing_variables")) +def test_undefined(cli, datafiles, element, provenance): + project = str(datafiles) + result = cli.run(project=project, silent=True, args=["show", "--deps", "none", "--format", "%{config}", element]) + result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE) + assert provenance in result.stderr -@pytest.mark.timeout(3, method="signal") -@pytest.mark.datafiles(os.path.join(DATA_DIR, 'cyclic_variables')) -def test_cyclic_variables(cli, datafiles): - print_warning("Performing cyclic test, if this test times out it will " + - "exit the test sequence") - project = os.path.join(datafiles.dirname, datafiles.basename) - result = cli.run(project=project, silent=True, args=[ - "build", "cyclic.bst" - ]) - result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE) +@pytest.mark.parametrize( + "element,provenances", + [ + # Test a simple a -> b and b -> a reference + ("simple-cyclic.bst", ["simple-cyclic.bst [line 4 column 5]", "simple-cyclic.bst [line 5 column 5]"]), + # Test a simple a -> b and b -> a reference with some text involved + ("cyclic.bst", ["cyclic.bst [line 5 column 10]", "cyclic.bst [line 4 column 5]"]), + # Test an indirect circular dependency + ( + "indirect-cyclic.bst", + [ + "indirect-cyclic.bst [line 5 column 5]", + "indirect-cyclic.bst [line 6 column 5]", + "indirect-cyclic.bst [line 7 column 5]", + "indirect-cyclic.bst [line 8 column 5]", + ], + ), + # Test an indirect circular dependency + ("self-reference.bst", ["self-reference.bst [line 4 column 5]"]), + ], + ids=["simple", "simple-text", "indirect", "self-reference"], +) +@pytest.mark.timeout(15, method="signal") +@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables")) +def test_circular_reference(cli, datafiles, element, provenances): + print_warning("Performing cyclic test, if this test times out it will exit the test sequence") + project = str(datafiles) + result = cli.run(project=project, silent=True, args=["build", element]) + result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE) + for provenance in provenances: + assert provenance in result.stderr def print_warning(msg): RED, END = "\033[91m", "\033[0m" print(("\n{}{}{}").format(RED, msg, END), file=sys.stderr) + + +# Test that variables which refer to eachother very deeply are +# still resolved correctly, this ensures that we are not relying +# on a recursive algorithm limited by stack depth. +# +@pytest.mark.parametrize( + "maxvars", [50, 500, 5000], +) +@pytest.mark.datafiles(os.path.join(DATA_DIR, "defaults")) +def test_deep_references(cli, datafiles, maxvars): + project = str(datafiles) + + # Generate an element with very, very many variables to resolve, + # each which expand to the value of the previous variable. + # + # The bottom variable defines a test value which we check for + # in the top variable in `bst show` output. + # + topvar = "var{}".format(maxvars) + bottomvar = "var0" + testvalue = "testvalue {}".format(maxvars) + + # Generate + variables = {"var{}".format(idx + 1): "%{var" + str(idx) + "}" for idx in range(maxvars)} + variables[bottomvar] = testvalue + element = {"kind": "manual", "variables": variables} + _yaml.dump(element, os.path.join(project, "test.bst")) + + # Run `bst show` + result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"]) + result.assert_success() + + # Test results + result_vars = _yaml.load_data(result.output) + assert result_vars[topvar] == testvalue + + +@pytest.mark.datafiles(os.path.join(DATA_DIR, "partial_context")) +def test_partial_context_junctions(cli, datafiles): + project = str(datafiles) + + result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"]) + result.assert_success() + result_vars = _yaml.load_data(result.output) + assert result_vars["eltvar"] == "/bar/foo/baz" diff --git a/tests/format/variables/cyclic_variables/cyclic.bst b/tests/format/variables/cyclic_variables/cyclic.bst index a05a40b27..38832fa86 100644 --- a/tests/format/variables/cyclic_variables/cyclic.bst +++ b/tests/format/variables/cyclic_variables/cyclic.bst @@ -2,4 +2,4 @@ kind: manual variables: a: "%{prefix}/a" - prefix: "%{a}/some_prefix/"
\ No newline at end of file + prefix: "%{a}/some_prefix/" diff --git a/tests/format/variables/cyclic_variables/indirect-cyclic.bst b/tests/format/variables/cyclic_variables/indirect-cyclic.bst new file mode 100644 index 000000000..fb06fb008 --- /dev/null +++ b/tests/format/variables/cyclic_variables/indirect-cyclic.bst @@ -0,0 +1,8 @@ +kind: manual + +variables: + foo: "%{a}" + a: "%{b}" + b: "%{c}" + c: "%{d}" + d: "%{a}" diff --git a/tests/format/variables/cyclic_variables/self-reference.bst b/tests/format/variables/cyclic_variables/self-reference.bst new file mode 100644 index 000000000..2e9829d03 --- /dev/null +++ b/tests/format/variables/cyclic_variables/self-reference.bst @@ -0,0 +1,4 @@ +kind: manual + +variables: + a: "Referencing itself with %{a}" diff --git a/tests/format/variables/cyclic_variables/simple-cyclic.bst b/tests/format/variables/cyclic_variables/simple-cyclic.bst new file mode 100644 index 000000000..806e1f390 --- /dev/null +++ b/tests/format/variables/cyclic_variables/simple-cyclic.bst @@ -0,0 +1,5 @@ +kind: manual + +variables: + a: "%{b}" + b: "%{a}" diff --git a/tests/format/variables/missing_variables/manual2.bst b/tests/format/variables/missing_variables/manual2.bst new file mode 100644 index 000000000..bd8e2baf7 --- /dev/null +++ b/tests/format/variables/missing_variables/manual2.bst @@ -0,0 +1,4 @@ +kind: manual + +variables: + test: hello %{missing} diff --git a/tests/format/variables/missing_variables/manual3.bst b/tests/format/variables/missing_variables/manual3.bst new file mode 100644 index 000000000..ff3c8d583 --- /dev/null +++ b/tests/format/variables/missing_variables/manual3.bst @@ -0,0 +1,10 @@ +kind: manual + +variables: + hello: "Hello mister %{pony}" + greeting: "The %{hello} string twice: %{hello} again" + pony: "The pony is %{undefined}" + +config: + build-commands: + - Some indirectly undefined variable %{greeting} diff --git a/tests/format/variables/partial_context/base.bst b/tests/format/variables/partial_context/base.bst new file mode 100644 index 000000000..10ce559a9 --- /dev/null +++ b/tests/format/variables/partial_context/base.bst @@ -0,0 +1,4 @@ +kind: junction +sources: +- kind: local + path: base diff --git a/tests/format/variables/partial_context/base/project.conf b/tests/format/variables/partial_context/base/project.conf new file mode 100644 index 000000000..b9f96102d --- /dev/null +++ b/tests/format/variables/partial_context/base/project.conf @@ -0,0 +1,2 @@ +name: base + diff --git a/tests/format/variables/partial_context/base/vars.yml b/tests/format/variables/partial_context/base/vars.yml new file mode 100644 index 000000000..3a91e1310 --- /dev/null +++ b/tests/format/variables/partial_context/base/vars.yml @@ -0,0 +1,2 @@ +variables: + subvar: "/bar" diff --git a/tests/format/variables/partial_context/project.conf b/tests/format/variables/partial_context/project.conf new file mode 100644 index 000000000..4e4dfe2ff --- /dev/null +++ b/tests/format/variables/partial_context/project.conf @@ -0,0 +1,7 @@ +name: test + +(@): base.bst:vars.yml + +variables: + var: "%{subvar}/foo" + diff --git a/tests/format/variables/partial_context/test.bst b/tests/format/variables/partial_context/test.bst new file mode 100644 index 000000000..8276763c3 --- /dev/null +++ b/tests/format/variables/partial_context/test.bst @@ -0,0 +1,3 @@ +kind: manual +variables: + eltvar: "%{var}/baz" diff --git a/tests/frontend/overlaps.py b/tests/frontend/overlaps.py index ad2211dde..1ea6d157c 100644 --- a/tests/frontend/overlaps.py +++ b/tests/frontend/overlaps.py @@ -2,7 +2,7 @@ import os import pytest from tests.testutils.runcli import cli from tests.testutils import generate_junction -from buildstream._exceptions import ErrorDomain +from buildstream._exceptions import ErrorDomain, LoadErrorReason from buildstream import _yaml from buildstream.plugin import CoreWarnings @@ -80,6 +80,26 @@ def test_overlaps_whitelist_on_overlapper(cli, datafiles): @pytest.mark.datafiles(DATA_DIR) +def test_overlaps_whitelist_undefined_variable(cli, datafiles): + project_dir = str(datafiles) + gen_project(project_dir, False) + result = cli.run(project=project_dir, silent=True, args=["build", "collect-whitelisted-undefined.bst"]) + + # Assert that we get the expected undefined variable error, + # and that it has the provenance we expect from whitelist-undefined.bst + # + # FIXME: In BuildStream 1, we only encounter this error later when extracting + # the variables from an artifact, and we lose the provenance. + # + # This is not a huge problem in light of the coming of BuildStream 2 and + # is probably not worth too much attention, but it is worth noting that + # this is an imperfect error message delivered at a late stage. + # + result.assert_main_error(ErrorDomain.STREAM, None) + assert "public.yaml [line 3 column 4]" in result.stderr + + +@pytest.mark.datafiles(DATA_DIR) @pytest.mark.parametrize("use_fatal_warnings", [True, False]) def test_overlaps_script(cli, datafiles, use_fatal_warnings): # Test overlaps with script element to test diff --git a/tests/frontend/overlaps/collect-whitelisted-undefined.bst b/tests/frontend/overlaps/collect-whitelisted-undefined.bst new file mode 100644 index 000000000..9572cd6a0 --- /dev/null +++ b/tests/frontend/overlaps/collect-whitelisted-undefined.bst @@ -0,0 +1,9 @@ +kind: compose + +depends: +- filename: a-whitelisted.bst + type: build +- filename: whitelist-undefined.bst + type: build +- filename: c.bst + type: build diff --git a/tests/frontend/overlaps/whitelist-undefined.bst b/tests/frontend/overlaps/whitelist-undefined.bst new file mode 100644 index 000000000..5cb0c645e --- /dev/null +++ b/tests/frontend/overlaps/whitelist-undefined.bst @@ -0,0 +1,13 @@ +kind: import +config: + source: / + target: / +depends: +- b-whitelisted.bst +sources: +- kind: local + path: "a" +public: + bst: + overlap-whitelist: + - "%{undefined-variable}/*" |