summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbst-marge-bot <marge-bot@buildstream.build>2020-07-23 03:32:25 +0000
committerbst-marge-bot <marge-bot@buildstream.build>2020-07-23 03:32:25 +0000
commitf5d6bae43879c58cfb9b42cf047e794277e10fb6 (patch)
tree1be77bb7bce8ed846fc1cce3d96332ec86f357e7
parent64d257e6e34382f4e90b834a05da36f31e1167f9 (diff)
parent59e6e2a3c1973c5efe09035c925c884b226192b3 (diff)
downloadbuildstream-f5d6bae43879c58cfb9b42cf047e794277e10fb6.tar.gz
Merge branch 'tristan/partial-variables' into 'master'
Variables non-recursive and lazy resolution refactor See merge request BuildStream/buildstream!1973
-rw-r--r--src/buildstream/_variables.pyx776
-rw-r--r--src/buildstream/element.py6
-rw-r--r--src/buildstream/exceptions.py4
-rw-r--r--tests/format/variables.py99
-rw-r--r--tests/format/variables/cyclic_variables/cyclic.bst2
-rw-r--r--tests/format/variables/cyclic_variables/indirect-cyclic.bst8
-rw-r--r--tests/format/variables/cyclic_variables/self-reference.bst4
-rw-r--r--tests/format/variables/missing_variables/manual3.bst10
-rw-r--r--tests/format/variables/partial_context/base.bst4
-rw-r--r--tests/format/variables/partial_context/base/project.conf3
-rw-r--r--tests/format/variables/partial_context/base/vars.yml2
-rw-r--r--tests/format/variables/partial_context/project.conf8
-rw-r--r--tests/format/variables/partial_context/test.bst3
-rw-r--r--tests/frontend/overlaps.py15
-rw-r--r--tests/frontend/overlaps/whitelist-undefined.bst13
15 files changed, 755 insertions, 202 deletions
diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index e3fcfc3a0..78f4dcecb 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2016 Codethink Limited
+# Copyright (C) 2020 Codethink Limited
# Copyright (C) 2019 Bloomberg L.P.
#
# This program is free software; you can redistribute it and/or
@@ -22,42 +22,94 @@
import re
import sys
+import itertools
from ._exceptions import LoadError
from .exceptions import LoadErrorReason
-from .node cimport MappingNode, Node, ScalarNode, SequenceNode
+from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
-# Variables are allowed to have dashes here
+########################################################
+# Understanding Value Expressions #
+########################################################
+#
+# This code uses the term "value expression" a lot to refer to `str` objects
+# which have references to variables in them, and also to `list` objects which
+# are effectively broken down strings.
+#
+# Ideally we would have a ValueExpression type in order to make this more
+# comprehensive, but this would unfortunately introduce unnecessary overhead,
+# making the code measurably slower.
+#
+# Value Expression Strings
+# ------------------------
+# Strings which contain variables in them, such as:
+#
+# "My name is %{username}, good day."
+#
+#
+# Parsed Value Expression Lists
+# -----------------------------
+# Using `re.split()` from python's regular expression implementation, we
+# parse the list using our locally defined VALUE_EXPRESSION_REGEX, which
+# breaks down the string into a list of "literal" and "variable" components.
+#
+# The "literal" components are literal portions of the string which need
+# no substitution, while the "variable" components represent variable names
+# which need to be substituted with their corresponding resolved values.
+#
+# The parsed variable expressions have the following properties:
+#
+# * They are sparse, some of the "literal" values contain zero length
+# strings which can be ignored.
+#
+# * Literal values are found only at even indices of the parsed
+# variable expression
+#
+# * Variable names are found only at odd indices
+#
+# The above example "My name is %{username}, good day." is broken down
+# into a parsed value expression as follows:
+#
+# [
+# "My name is ", # <- Index 0, literal value
+# "username", # <- Index 1, variable name, '%{ ... }' discarded
+# ", good day." # <- Index 2, literal value
+# ]
#
-PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
+# Maximum recursion depth using the fast (recursive) variable resolution
+# algorithm.
+#
+cdef Py_ssize_t MAX_RECURSION_DEPTH = 200
-# Throughout this code you will see variables named things like `expstr`.
-# These hold data structures called "expansion strings" and are the parsed
-# form of the strings which are the input to this subsystem. Strings
-# such as "Hello %{name}, how are you?" are parsed into the form:
-# ["Hello ", "name", ", how are you?"]
-# i.e. a list which consists of one or more strings.
-# Strings in even indices of the list (0, 2, 4, etc) are constants which
-# are copied into the output of the expansion algorithm. Strings in the
-# odd indices (1, 3, 5, etc) are the names of further expansions to make.
-# In the example above, first "Hello " is copied, then "name" is expanded
-# and so must be another named expansion string passed in to the constructor
-# of the Variables class, and whatever is yielded from the expansion of "name"
-# is added to the concatenation for the result. Finally ", how are you?" is
-# copied in and the whole lot concatenated for return.
+# Regular expression used to parse %{variables} in value expressions
#
-# To see how strings are parsed, see `_parse_expstr()` after the class, and
-# to see how expansion strings are expanded, see `_expand_expstr()` after that.
+# Note that variables are allowed to have dashes
+#
+VALUE_EXPRESSION_REGEX = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
+
+# Cache for the parsed expansion strings.
+#
+cdef dict VALUE_EXPRESSION_CACHE = {
+ # Prime the cache with the empty string since otherwise that can
+ # cause issues with the parser, complications to which cause slowdown
+ "": [""],
+}
-# The Variables helper object will resolve the variable references in
-# the given dictionary, expecting that any dictionary values which contain
-# variable references can be resolved from the same dictionary.
+# Variables()
+#
+# The Variables object resolves the variable references in the given MappingNode,
+# expecting that any dictionary values which contain variable references can be
+# resolved from the same dictionary.
#
# Each Element creates its own Variables instance to track the configured
# variable settings for the element.
#
+# Notably, this object is delegated the responsibility of expanding
+# variables in yaml Node hierarchies and substituting variables in strings
+# in the context of a given Element's variable configuration.
+#
# Args:
# node (Node): A node loaded and composited with yaml tools
#
@@ -66,20 +118,61 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
#
cdef class Variables:
- cdef MappingNode original
- cdef dict _expstr_map
+ cdef MappingNode _original
+ cdef dict _values
+ #################################################################
+ # Dunder Methods #
+ #################################################################
def __init__(self, MappingNode node):
- self.original = node
- self._expstr_map = self._resolve(node)
- self._check_for_missing()
- self._check_for_cycles()
+ # The original MappingNode, we need to keep this
+ # around for proper error reporting.
+ #
+ self._original = node
+
+ # The value map, this dictionary contains either unresolved
+ # value expressions, or resolved values.
+ #
+ # Each mapping value is a list, in the case that the value
+ # is resolved, then the list is only 1 element long.
+ #
+ self._values = self._init_values(node)
+
+ # __getitem__()
+ #
+ # Fetches a resolved variable by it's name, allows
+ # addressing the Variables instance like a dictionary.
+ #
+ # Args:
+ # name (str): The name of the variable
+ #
+ # Returns:
+ # (str): The resolved variable value
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
+ #
def __getitem__(self, str name):
- return _expand_var(self._expstr_map, name)
+ if name not in self._values:
+ raise KeyError(name)
+
+ return self._expand_var(name)
+ # __contains__()
+ #
+ # Checks whether a given variable exists, allows
+ # supporting `if 'foo' in variables` expressions.
+ #
+ # Args:
+ # name (str): The name of the variable to check for
+ #
+ # Returns:
+ # (bool): True if `name` is a valid variable
+ #
def __contains__(self, str name):
- return name in self._expstr_map
+ return name in self._values
# __iter__()
#
@@ -89,7 +182,28 @@ cdef class Variables:
# (Iterator[Tuple[str, str]])
#
def __iter__(self):
- return _VariablesIterator(self._expstr_map)
+ return _VariablesIterator(self)
+
+ #################################################################
+ # Public API #
+ #################################################################
+
+ # check()
+ #
+ # Assert that all variables declared on this Variables
+ # instance have been resolved properly, and reports errors
+ # for undefined references and circular references.
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
+ #
+ cpdef check(self):
+ cdef object key
+
+ # Just resolve all variables.
+ for key in self._values.keys():
+ self._expand_var(<str> key)
# get()
#
@@ -97,15 +211,15 @@ cdef class Variables:
# defined, it will return None instead of failing.
#
# Args:
- # name (str): Name of the variable to expand
+ # name (str): Name of the variable to expand
#
# Returns:
- # (str|None): The expanded value for the variable or None variable was not defined.
+ # (str|None): The expanded value for the variable or None variable was not defined.
#
cpdef str get(self, str name):
- if name not in self._expstr_map:
+ if name not in self._values:
return None
- return _expand_var(self._expstr_map, name)
+ return self[name]
# expand()
#
@@ -114,11 +228,15 @@ cdef class Variables:
# the node untouched, you should use `node.clone()` beforehand
#
# Args:
- # (Node): A node for which to substitute the values
+ # (Node): A node for which to substitute the values
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
#
cpdef expand(self, Node node):
if isinstance(node, ScalarNode):
- (<ScalarNode> node).value = self.subst((<ScalarNode> node).value)
+ (<ScalarNode> node).value = self.subst(<ScalarNode> node)
elif isinstance(node, SequenceNode):
for entry in (<SequenceNode> node).value:
self.expand(entry)
@@ -133,43 +251,40 @@ cdef class Variables:
# Substitutes any variables in 'string' and returns the result.
#
# Args:
- # (string): The string to substitute
+ # node (ScalarNode): The ScalarNode to substitute variables in
#
# Returns:
- # (string): The new string with any substitutions made
+ # (str): The new string with any substitutions made
#
# Raises:
- # LoadError, if the string contains unresolved variable references.
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
#
- cpdef subst(self, str string):
- expstr = _parse_expstr(string)
+ cpdef str subst(self, ScalarNode node):
+ value_expression = _parse_value_expression(node.as_str())
+ return self._expand_value_expression(value_expression, node)
- try:
- return _expand_expstr(self._expstr_map, expstr)
- except KeyError:
- unmatched = []
-
- # Look for any unmatched variable names in the expansion string
- for var in expstr[1::2]:
- if var not in self._expstr_map:
- unmatched.append(var)
-
- if unmatched:
- message = "Unresolved variable{}: {}".format(
- "s" if len(unmatched) > 1 else "",
- ", ".join(unmatched)
- )
-
- raise LoadError(message, LoadErrorReason.UNRESOLVED_VARIABLE)
- # Otherwise, re-raise the KeyError since it clearly came from some
- # other unknowable cause.
- raise
-
- # Variable resolving code
- #
- # Here we resolve all of our inputs into a dictionary, ready for use
- # in subst()
- cdef dict _resolve(self, MappingNode node):
+ #################################################################
+ # Private API #
+ #################################################################
+
+ # _init_values()
+ #
+ # Initialize the table of values.
+ #
+ # The value table is a dictionary keyed by the variable names where
+ # the values are value expressions (lists) which are initially unresolved.
+ #
+ # Value expressions are later resolved on demand and replaced in this
+ # table with single element lists.
+ #
+ # Args:
+ # node (MappingNode): The original variables mapping node
+ #
+ # Returns:
+ # (dict): A dictionary of value expressions (lists)
+ #
+ cdef dict _init_values(self, MappingNode node):
# Special case, if notparallel is specified in the variables for this
# element, then override max-jobs to be 1.
# Initialize it as a string as all variables are processed as strings.
@@ -178,158 +293,451 @@ cdef class Variables:
node['max-jobs'] = str(1)
cdef dict ret = {}
+ cdef object key_object
cdef str key
cdef str value
- for key in node.keys():
+ for key_object in node.keys():
+ key = <str> key_object
value = node.get_str(key)
- ret[sys.intern(key)] = _parse_expstr(value)
+ ret[sys.intern(key)] = _parse_value_expression(value)
+
return ret
- def _check_for_missing(self):
- # First the check for anything unresolvable
- summary = []
- for key, expstr in self._expstr_map.items():
- for var in expstr[1::2]:
- if var not in self._expstr_map:
- line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}"
- provenance = self.original.get_scalar(key).get_provenance()
- summary.append(line.format(unmatched=var, variable=key, provenance=provenance))
- if summary:
- raise LoadError("Failed to resolve one or more variable:\n{}\n".format("\n".join(summary)),
- LoadErrorReason.UNRESOLVED_VARIABLE)
-
- def _check_for_cycles(self):
- # And now the cycle checks
- def cycle_check(expstr, visited, cleared):
- for var in expstr[1::2]:
- if var in cleared:
+ # _expand_var()
+ #
+ # Expand and cache a variable definition.
+ #
+ # This will try the fast, recursive path first and fallback to
+ # the slower iterative codepath.
+ #
+ # Args:
+ # name (str): Name of the variable to expand
+ #
+ # Returns:
+ # (str): The expanded value of variable
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
+ #
+ cdef str _expand_var(self, str name):
+ try:
+ return self._fast_expand_var(name)
+ except (KeyError, RecursionError):
+ return self._slow_expand_var(name)
+
+ # _expand_value_expression()
+ #
+ # Expands a value expression
+ #
+ # This will try the fast, recursive path first and fallback to
+ # the slower iterative codepath.
+ #
+ # Args:
+ # value_expression (list): The parsed value expression to be expanded
+ # node (ScalarNode): The toplevel ScalarNode who is asking for an expansion
+ #
+ # Returns:
+ # (str): The expanded value expression
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
+ #
+ cdef str _expand_value_expression(self, list value_expression, ScalarNode node):
+ try:
+ return self._fast_expand_value_expression(value_expression)
+ except (KeyError, RecursionError):
+ return self._slow_expand_value_expression(None, value_expression, node)
+
+ #################################################################
+ # Resolution algorithm: fast path #
+ #################################################################
+
+ # _fast_expand_var()
+ #
+ # Fast, recursive path for variable expansion
+ #
+ # Args:
+ # name (str): Name of the variable to expand
+ # counter (int): Number of recursion cycles (used only in recursion)
+ #
+ # Returns:
+ # (str): The expanded value of variable
+ #
+ # Raises:
+ # (KeyError): If a reference to an undefined variable is encountered
+ # (RecursionError): If MAX_RECURSION_DEPTH recursion cycles is reached
+ #
+ cdef str _fast_expand_var(self, str name, int counter = 0):
+ cdef str sub
+ cdef list value_expression
+
+ value_expression = <list> self._values[name]
+ if len(value_expression) > 1:
+ sub = self._fast_expand_value_expression(value_expression, counter)
+ value_expression = [sys.intern(sub)]
+ self._values[name] = value_expression
+
+ return <str> value_expression[0]
+
+ # _fast_expand_value_expression()
+ #
+ # Fast, recursive path for value expression expansion.
+ #
+ # Args:
+ # value_expression (list): The parsed value expression to be expanded
+ # counter (int): Number of recursion cycles (used only in recursion)
+ #
+ # Returns:
+ # (str): The expanded value expression
+ #
+ # Raises:
+ # (KeyError): If a reference to an undefined variable is encountered
+ # (RecursionError): If MAX_RECURSION_DEPTH recursion cycles is reached
+ #
+ cdef str _fast_expand_value_expression(self, list value_expression, int counter = 0):
+ if counter > MAX_RECURSION_DEPTH:
+ raise RecursionError()
+
+ cdef Py_ssize_t idx
+ cdef object value
+ cdef list acc = []
+
+ for idx, value in enumerate(value_expression):
+ if (idx % 2) == 0:
+ acc.append(value)
+ else:
+ acc.append(self._fast_expand_var(<str> value, counter + 1))
+
+ return "".join(acc)
+
+ #################################################################
+ # Resolution algorithm: slow path #
+ #################################################################
+
+ # _slow_expand_var()
+ #
+ # Slow, iterative path for variable expansion with full error reporting
+ #
+ # Args:
+ # name (str): Name of the variable to expand
+ #
+ # Returns:
+ # (str): The expanded value of variable
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
+ #
+ cdef str _slow_expand_var(self, str name):
+ cdef list value_expression
+ cdef str expanded
+
+ value_expression = self._get_checked_value_expression(name, None, None)
+ if len(value_expression) > 1:
+ expanded = self._slow_expand_value_expression(name, value_expression, None)
+ value_expression = [sys.intern(expanded)]
+ self._values[name] = value_expression
+
+ return <str> value_expression[0]
+
+ # _slow_expand_value_expression()
+ #
+ # Slow, iterative path for value expression expansion with full error reporting
+ #
+ # Note that either `varname` or `node` must be provided, these are used to
+ # identify the provenance of this value expression (which might be the value
+ # of a variable, or a value expression found elswhere in project YAML which
+ # needs to be substituted).
+ #
+ # Args:
+ # varname (str|None): The variable name associated with this value expression, if any
+ # value_expression (list): The parsed value expression to be expanded
+ # node (ScalarNode|None): The ScalarNode who is asking for an expansion
+ #
+ # Returns:
+ # (str): The expanded value expression
+ #
+ # Raises:
+ # (LoadError): In the case of an undefined variable or
+ # a cyclic variable reference
+ #
+ cdef str _slow_expand_value_expression(self, str varname, list value_expression, ScalarNode node):
+ cdef ResolutionStep step
+ cdef ResolutionStep new_step
+ cdef ResolutionStep this_step
+ cdef list iter_value_expression
+ cdef Py_ssize_t idx = 0
+ cdef object value
+ cdef str resolved_varname
+ cdef str resolved_value = None
+
+ # We will collect the varnames and value expressions which need
+ # to be resolved in the loop, sorted by dependency, and then
+ # finally reverse through them resolving them one at a time
+ #
+ cdef list resolved_varnames = []
+ cdef list resolved_values = []
+
+ step = ResolutionStep()
+ step.init(varname, value_expression, None)
+ while step:
+ # Keep a hold of the current overall step
+ this_step = step
+ step = step.prev
+
+ # Check for circular dependencies
+ this_step.check_circular(self._original)
+
+ for idx, value in enumerate(this_step.value_expression):
+
+ # Skip literal parts of the value expression
+ if (idx % 2) == 0:
continue
- if var in visited:
- raise LoadError("{}: ".format(self.original.get_scalar(var).get_provenance()) +
- ("Variable '{}' expands to contain a reference to itself. " +
- "Perhaps '{}' contains '%{{{}}}").format(var, visited[-1], var),
- LoadErrorReason.RECURSIVE_VARIABLE)
- visited.append(var)
- cycle_check(self._expstr_map[var], visited, cleared)
- visited.pop()
- cleared.add(var)
-
- cleared = set()
- for key, expstr in self._expstr_map.items():
- if key not in cleared:
- cycle_check(expstr, [key], cleared)
-
-# Cache for the parsed expansion strings. While this is nominally
-# something which might "waste" memory, in reality each of these
-# will live as long as the element which uses it, which is the
-# vast majority of the memory usage across the execution of BuildStream.
-cdef dict PARSE_CACHE = {
- # Prime the cache with the empty string since otherwise that can
- # cause issues with the parser, complications to which cause slowdown
- "": [""],
-}
+ iter_value_expression = self._get_checked_value_expression(<str> value, this_step.referee, node)
+
+ # Queue up this value.
+ #
+ # Even if the value was already resolved, we need it in context to resolve
+ # previously enqueued variables
+ resolved_values.append(iter_value_expression)
+ resolved_varnames.append(value)
+
+ # Queue up the values dependencies.
+ #
+ if len(iter_value_expression) > 1:
+ new_step = ResolutionStep()
+ new_step.init(<str> value, iter_value_expression, this_step)
+
+ # Link it to the end of the stack
+ new_step.prev = step
+ step = new_step
+
+ # We've now constructed the dependencies queue such that
+ # later dependencies are on the right, we can now safely peddle
+ # backwards and the last (leftmost) resolved value is the one
+ # we want to return.
+ #
+ for iter_value_expression, resolved_varname in zip(reversed(resolved_values), reversed(resolved_varnames)):
-# Helper to parse a string into an expansion string tuple, caching
-# the results so that future parse requests don't need to think about
-# the string
-cdef list _parse_expstr(str instr):
- cdef list ret
+ # Resolve variable expressions as needed
+ #
+ if len(iter_value_expression) > 1:
+ resolved_value = self._resolve_value_expression(iter_value_expression)
+ iter_value_expression = [resolved_value]
+ if resolved_varname is not None:
+ self._values[resolved_varname] = iter_value_expression
- try:
- return <list> PARSE_CACHE[instr]
- except KeyError:
- # This use of the regex turns a string like "foo %{bar} baz" into
- # a list ["foo ", "bar", " baz"]
- splits = PARSE_EXPANSION.split(instr)
- # If an expansion ends the string, we get an empty string on the end
- # which we can optimise away, making the expansion routines not need
- # a test for this.
- if splits[-1] == '':
- del splits [-1]
- # Cache an interned copy of this. We intern it to try and reduce the
- # memory impact of the cache. It seems odd to cache the list length
- # but this is measurably cheaper than calculating it each time during
- # string expansion.
- ret = [sys.intern(<str> s) for s in <list> splits]
- PARSE_CACHE[instr] = ret
- return ret
+ return resolved_value
+ # _get_checked_value_expression()
+ #
+ # Fetches a value expression from the value table and raises a user
+ # facing error if the value is undefined.
+ #
+ # Args:
+ # varname (str): The variable name to fetch
+ # referee (str): The variable name referring to `varname`, or None
+ # node (ScalarNode): The ScalarNode for which we need to resolve `name`
+ #
+ # Returns:
+ # (list): The value expression for varname
+ #
+ # Raises:
+ # (LoadError): An appropriate error in case of undefined variables
+ #
+ cdef list _get_checked_value_expression(self, str varname, str referee, ScalarNode node):
+ cdef ProvenanceInformation provenance = None
+ cdef Node referee_node
+ cdef str error_message
-# Helper to expand and cache a variable definition in the context of
-# the given dictionary of expansion strings.
-#
-# Args:
-# content (dict): Dictionary of expansion strings
-# name (str): Name of the variable to expand
-# counter (int): Recursion counter
-#
-# Returns:
-# (str): The expanded value of variable
+ #
+ # Fetch the value and detect undefined references
+ #
+ try:
+ return <list> self._values[varname]
+ except KeyError as e:
+
+ # Either the provenance is the toplevel calling provenance,
+ # or it is the provenance of the direct referee
+ referee_node = self._original.get_node(referee, allowed_types=None, allow_none=True)
+ if referee_node is not None:
+ provenance = referee_node.get_provenance()
+ elif node:
+ provenance = node.get_provenance()
+
+ error_message = "Reference to undefined variable '{}'".format(varname)
+ if provenance:
+ error_message = "{}: {}".format(provenance, error_message)
+ raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e
+
+ # _resolve_value_expression()
+ #
+ # Resolves a value expression with the expectation that all
+ # variables within this value expression have already been
+ # resolved and updated in the Variables._values table.
+ #
+ # This is used as a part of the iterative resolution codepath,
+ # where value expressions are first sorted by dependency before
+ # being resolved in one go.
+ #
+ # Args:
+ # value_expression (list): The value expression to resolve
+ #
+ # Returns:
+ # (str): The resolved value expression
+ #
+ cdef str _resolve_value_expression(self, list value_expression):
+ cdef Py_ssize_t idx
+ cdef object value
+ cdef list acc = []
+
+ for idx, value in enumerate(value_expression):
+ if (idx % 2) == 0:
+ acc.append(value)
+ else:
+ acc.append(self._values[value][0])
+
+ return "".join(acc)
+
+
+# ResolutionStep()
#
-# Raises:
-# KeyError, if any expansion is missing
-# RecursionError, if recursion required for evaluation is too deep
+# The context for a single iteration in variable resolution.
#
-cdef str _expand_var(dict content, str name, int counter = 0):
- cdef str sub
+cdef class ResolutionStep:
+ cdef str referee
+ cdef list value_expression
+ cdef ResolutionStep parent
+ cdef ResolutionStep prev
+
+ # init()
+ #
+ # Initialize this ResolutionStep
+ #
+ # Args:
+ # referee (str): The name of the referring variable
+ # value_expression (list): The parsed value expression to be expanded
+ # parent (ResolutionStep): The parent ResolutionStep
+ #
+ cdef init(self, str referee, list value_expression, ResolutionStep parent):
+ self.referee = referee
+ self.value_expression = value_expression
+ self.parent = parent
+ self.prev = None
+
+ # check_circular()
+ #
+ # Check for circular references in this step.
+ #
+ # Args:
+ # original_values (MappingNode): The original MappingNode for the Variables
+ #
+ # Raises:
+ # (LoadError): Will raise a user facing LoadError with
+ # LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case
+ # circular references were encountered.
+ #
+ cdef check_circular(self, MappingNode original_values):
+ cdef ResolutionStep step = self.parent
+ while step:
+ if self.referee is step.referee:
+ self._raise_circular_reference_error(step, original_values)
+ step = step.parent
+
+ # _raise_circular_reference_error()
+ #
+ # Helper function to construct a full report and raise the LoadError
+ # with LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE.
+ #
+ # Args:
+ # conflict (ResolutionStep): The resolution step which conflicts with this step
+ # original_values (MappingNode): The original node to extract provenances from
+ #
+ # Raises:
+ # (LoadError): Unconditionally
+ #
+ cdef _raise_circular_reference_error(self, ResolutionStep conflict, MappingNode original_values):
+ cdef list error_lines = []
+ cdef ResolutionStep step = self
+ cdef ScalarNode node
+ cdef str referee
+
+ while step is not conflict:
+ if step.parent:
+ referee = step.parent.referee
+ else:
+ referee = self.referee
+
+ node = original_values.get_scalar(referee)
- if len(content[name]) > 1:
- sub = _expand_expstr(content, <list> content[name], counter)
- content[name] = [sys.intern(sub)]
+ error_lines.append("{}: Variable '{}' refers to variable '{}'".format(node.get_provenance(), referee, step.referee))
+ step = step.parent
- return content[name][0]
+ raise LoadError("Circular dependency detected on variable '{}'".format(self.referee),
+ LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE,
+ detail="\n".join(reversed(error_lines)))
-# Helper to expand a given top level expansion string tuple in the context
-# of the given dictionary of expansion strings.
+# _parse_value_expression()
+#
+# Tries to fetch the parsed value expression from the cache, parsing and
+# caching value expressions on demand and returns the parsed value expression.
#
# Args:
-# content (dict): Dictionary of expansion strings
-# name (str): Name of the variable to expand
-# counter (int): Recursion counter
+# value_expression (str): The value expression in string form to parse
#
# Returns:
-# (str): The expanded value of variable
-#
-# Raises:
-# KeyError, if any expansion is missing
-# RecursionError, if recursion required for evaluation is too deep
+# (list): The parsed value expression in list form.
#
-cdef str _expand_expstr(dict content, list value, int counter = 0):
- if counter > 1000:
- raise RecursionError()
+cdef list _parse_value_expression(str value_expression):
+ cdef list ret
- cdef Py_ssize_t idx = 0
- cdef Py_ssize_t value_len = len(value)
- cdef str sub
- cdef list acc = []
+ try:
+ return <list> VALUE_EXPRESSION_CACHE[value_expression]
+ except KeyError:
+ # This use of the regex turns a string like "foo %{bar} baz" into
+ # a list ["foo ", "bar", " baz"]
+ #
+ # The result is a parsed value expression, where even indicies
+ # contain literal parts of the value and odd indices contain
+ # variable names which need to be replaced by resolved variables.
+ #
+ splits = VALUE_EXPRESSION_REGEX.split(value_expression)
- while idx < value_len:
- acc.append(value[idx])
- idx += 1
+ # Optimize later routines by discarding any unnecessary trailing
+ # empty strings.
+ #
+ if splits[-1] == '':
+ del splits[-1]
- if idx < value_len:
- acc.append(_expand_var(content, <str> value[idx], counter + 1))
- idx += 1
+ # We intern the string parts to try and reduce the memory impact
+ # of the cache.
+ #
+ ret = [sys.intern(<str> s) for s in <list> splits]
- return "".join(acc)
+ # Cache and return the value expression
+ #
+ VALUE_EXPRESSION_CACHE[value_expression] = ret
+ return ret
# Iterator for all flatten variables.
# Used by Variables.__iter__
cdef class _VariablesIterator:
- cdef dict _expstr_map
+ cdef Variables _variables
cdef object _iter
- def __init__(self, dict expstr_map):
- self._expstr_map = expstr_map
- self._iter = iter(expstr_map)
+ def __init__(self, Variables variables):
+ self._variables = variables
+ self._iter = iter(variables._values)
def __iter__(self):
return self
def __next__(self):
name = next(self._iter)
- return name, _expand_var(self._expstr_map, name)
+ return name, self._variables._expand_var(name)
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 45c3735fd..547397e54 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -285,6 +285,8 @@ class Element(Plugin):
variables = self.__extract_variables(project, meta)
variables["element-name"] = self.name
self.__variables = Variables(variables)
+ if not meta.is_junction:
+ self.__variables.check()
# Collect the composited environment now that we have variables
unexpanded_env = self.__extract_environment(project, meta)
@@ -2815,8 +2817,8 @@ class Element(Plugin):
# If this ever changes, things will go wrong unexpectedly.
if not self.__whitelist_regex:
bstdata = self.get_public_data("bst")
- whitelist = bstdata.get_str_list("overlap-whitelist", default=[])
- whitelist_expressions = [utils._glob2re(self.__variables.subst(exp.strip())) for exp in whitelist]
+ whitelist = bstdata.get_sequence("overlap-whitelist", default=[])
+ whitelist_expressions = [utils._glob2re(self.__variables.subst(node)) for node in whitelist]
expression = "^(?:" + "|".join(whitelist_expressions) + ")$"
self.__whitelist_regex = re.compile(expression)
return self.__whitelist_regex.match(os.path.join(os.sep, path))
diff --git a/src/buildstream/exceptions.py b/src/buildstream/exceptions.py
index e77d64fe7..caf08ae57 100644
--- a/src/buildstream/exceptions.py
+++ b/src/buildstream/exceptions.py
@@ -131,8 +131,8 @@ class LoadErrorReason(Enum):
RECURSIVE_INCLUDE = 21
"""A recursive include has been encountered"""
- RECURSIVE_VARIABLE = 22
- """A recursive variable has been encountered"""
+ CIRCULAR_REFERENCE_VARIABLE = 22
+ """A circular variable reference was detected"""
PROTECTED_VARIABLE_REDEFINED = 23
"""An attempt was made to set the value of a protected variable"""
diff --git a/tests/format/variables.py b/tests/format/variables.py
index c5e8eebad..2f436fc75 100644
--- a/tests/format/variables.py
+++ b/tests/format/variables.py
@@ -53,30 +53,95 @@ def test_overrides(cli, datafiles, target, varname, expected):
assert result_vars.get_str(varname) == expected
-@pytest.mark.parametrize("element", ["manual.bst", "manual2.bst"])
+@pytest.mark.parametrize(
+ "element,provenance",
+ [
+ # This test makes a reference to an undefined variable in a build command
+ ("manual.bst", "manual.bst [line 5 column 6]"),
+ # This test makes a reference to an undefined variable by another variable,
+ # ensuring that we validate variables even when they are unused
+ ("manual2.bst", "manual2.bst [line 4 column 8]"),
+ # This test uses a build command to refer to some variables which ultimately
+ # refer to an undefined variable, testing a more complex case.
+ ("manual3.bst", "manual3.bst [line 6 column 8]"),
+ ],
+ ids=["build-command", "variables", "complex"],
+)
@pytest.mark.datafiles(os.path.join(DATA_DIR, "missing_variables"))
-def test_missing_variable(cli, datafiles, element):
+def test_undefined(cli, datafiles, element, provenance):
project = str(datafiles)
result = cli.run(project=project, silent=True, args=["show", "--deps", "none", "--format", "%{config}", element])
result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+ assert provenance in result.stderr
+@pytest.mark.parametrize(
+ "element,provenances",
+ [
+ # Test a simple a -> b and b -> a reference
+ ("simple-cyclic.bst", ["simple-cyclic.bst [line 4 column 5]", "simple-cyclic.bst [line 5 column 5]"]),
+ # Test a simple a -> b and b -> a reference with some text involved
+ ("cyclic.bst", ["cyclic.bst [line 5 column 10]", "cyclic.bst [line 4 column 5]"]),
+ # Test an indirect circular dependency
+ (
+ "indirect-cyclic.bst",
+ [
+ "indirect-cyclic.bst [line 5 column 5]",
+ "indirect-cyclic.bst [line 6 column 5]",
+ "indirect-cyclic.bst [line 7 column 5]",
+ "indirect-cyclic.bst [line 8 column 5]",
+ ],
+ ),
+ # Test an indirect circular dependency
+ ("self-reference.bst", ["self-reference.bst [line 4 column 5]"]),
+ ],
+ ids=["simple", "simple-text", "indirect", "self-reference"],
+)
@pytest.mark.timeout(15, method="signal")
@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
-def test_simple_cyclic_variables(cli, datafiles):
- print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
+def test_circular_reference(cli, datafiles, element, provenances):
+ print_warning("Performing cyclic test, if this test times out it will exit the test sequence")
project = str(datafiles)
- result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"])
- result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
+ result = cli.run(project=project, silent=True, args=["build", element])
+ result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE)
+ for provenance in provenances:
+ assert provenance in result.stderr
-@pytest.mark.timeout(15, method="signal")
-@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
-def test_cyclic_variables(cli, datafiles):
- print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
+# Test that variables which refer to eachother very deeply are
+# still resolved correctly, this ensures that we are not relying
+# on a recursive algorithm limited by stack depth.
+#
+@pytest.mark.parametrize(
+ "maxvars", [50, 500, 5000],
+)
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "defaults"))
+def test_deep_references(cli, datafiles, maxvars):
project = str(datafiles)
- result = cli.run(project=project, silent=True, args=["build", "cyclic.bst"])
- result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
+
+ # Generate an element with very, very many variables to resolve,
+ # each which expand to the value of the previous variable.
+ #
+ # The bottom variable defines a test value which we check for
+ # in the top variable in `bst show` output.
+ #
+ topvar = "var{}".format(maxvars)
+ bottomvar = "var0"
+ testvalue = "testvalue {}".format(maxvars)
+
+ # Generate
+ variables = {"var{}".format(idx + 1): "%{var" + str(idx) + "}" for idx in range(maxvars)}
+ variables[bottomvar] = testvalue
+ element = {"kind": "manual", "variables": variables}
+ _yaml.roundtrip_dump(element, os.path.join(project, "test.bst"))
+
+ # Run `bst show`
+ result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"])
+ result.assert_success()
+
+ # Test results
+ result_vars = _yaml.load_data(result.output)
+ assert result_vars.get_str(topvar) == testvalue
@pytest.mark.parametrize("protected_var", PROTECTED_VARIABLES)
@@ -168,3 +233,13 @@ def test_variables_resolving_errors_in_public_section(cli, datafiles):
result = cli.run(project=project, args=["show", "--format", "%{public}", "public_unresolved.bst"])
result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+
+
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "partial_context"))
+def test_partial_context_junctions(cli, datafiles):
+ project = str(datafiles)
+
+ result = cli.run(project=project, args=["show", "--format", "%{vars}", "test.bst"])
+ result.assert_success()
+ result_vars = _yaml.load_data(result.output)
+ assert result_vars.get_str("eltvar") == "/bar/foo/baz"
diff --git a/tests/format/variables/cyclic_variables/cyclic.bst b/tests/format/variables/cyclic_variables/cyclic.bst
index a05a40b27..38832fa86 100644
--- a/tests/format/variables/cyclic_variables/cyclic.bst
+++ b/tests/format/variables/cyclic_variables/cyclic.bst
@@ -2,4 +2,4 @@ kind: manual
variables:
a: "%{prefix}/a"
- prefix: "%{a}/some_prefix/" \ No newline at end of file
+ prefix: "%{a}/some_prefix/"
diff --git a/tests/format/variables/cyclic_variables/indirect-cyclic.bst b/tests/format/variables/cyclic_variables/indirect-cyclic.bst
new file mode 100644
index 000000000..fb06fb008
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/indirect-cyclic.bst
@@ -0,0 +1,8 @@
+kind: manual
+
+variables:
+ foo: "%{a}"
+ a: "%{b}"
+ b: "%{c}"
+ c: "%{d}"
+ d: "%{a}"
diff --git a/tests/format/variables/cyclic_variables/self-reference.bst b/tests/format/variables/cyclic_variables/self-reference.bst
new file mode 100644
index 000000000..2e9829d03
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/self-reference.bst
@@ -0,0 +1,4 @@
+kind: manual
+
+variables:
+ a: "Referencing itself with %{a}"
diff --git a/tests/format/variables/missing_variables/manual3.bst b/tests/format/variables/missing_variables/manual3.bst
new file mode 100644
index 000000000..496864890
--- /dev/null
+++ b/tests/format/variables/missing_variables/manual3.bst
@@ -0,0 +1,10 @@
+kind: manual
+
+variables:
+ hello: "Hello mister %{pony}"
+ greeting: "The %{hello} string twice: %{hello} again"
+ pony: "The pony is %{undefined}"
+
+config:
+ build-commands:
+ - Some indirectly undefined variable %{greeting}
diff --git a/tests/format/variables/partial_context/base.bst b/tests/format/variables/partial_context/base.bst
new file mode 100644
index 000000000..10ce559a9
--- /dev/null
+++ b/tests/format/variables/partial_context/base.bst
@@ -0,0 +1,4 @@
+kind: junction
+sources:
+- kind: local
+ path: base
diff --git a/tests/format/variables/partial_context/base/project.conf b/tests/format/variables/partial_context/base/project.conf
new file mode 100644
index 000000000..91511bfe9
--- /dev/null
+++ b/tests/format/variables/partial_context/base/project.conf
@@ -0,0 +1,3 @@
+name: base
+min-version: 2.0
+
diff --git a/tests/format/variables/partial_context/base/vars.yml b/tests/format/variables/partial_context/base/vars.yml
new file mode 100644
index 000000000..3a91e1310
--- /dev/null
+++ b/tests/format/variables/partial_context/base/vars.yml
@@ -0,0 +1,2 @@
+variables:
+ subvar: "/bar"
diff --git a/tests/format/variables/partial_context/project.conf b/tests/format/variables/partial_context/project.conf
new file mode 100644
index 000000000..e8a8ed038
--- /dev/null
+++ b/tests/format/variables/partial_context/project.conf
@@ -0,0 +1,8 @@
+name: test
+min-version: 2.0
+
+(@): base.bst:vars.yml
+
+variables:
+ var: "%{subvar}/foo"
+
diff --git a/tests/format/variables/partial_context/test.bst b/tests/format/variables/partial_context/test.bst
new file mode 100644
index 000000000..8276763c3
--- /dev/null
+++ b/tests/format/variables/partial_context/test.bst
@@ -0,0 +1,3 @@
+kind: manual
+variables:
+ eltvar: "%{var}/baz"
diff --git a/tests/frontend/overlaps.py b/tests/frontend/overlaps.py
index a45fc700d..28bf8a7f5 100644
--- a/tests/frontend/overlaps.py
+++ b/tests/frontend/overlaps.py
@@ -4,7 +4,7 @@
import os
import pytest
from buildstream.testing.runcli import cli # pylint: disable=unused-import
-from buildstream.exceptions import ErrorDomain
+from buildstream.exceptions import ErrorDomain, LoadErrorReason
from buildstream import _yaml
from buildstream.plugin import CoreWarnings
from tests.testutils import generate_junction
@@ -71,6 +71,19 @@ def test_overlaps_whitelist_on_overlapper(cli, datafiles):
@pytest.mark.datafiles(DATA_DIR)
+def test_overlaps_whitelist_undefined_variable(cli, datafiles):
+ project_dir = str(datafiles)
+ gen_project(project_dir, False)
+ result = cli.run(project=project_dir, silent=True, args=["build", "whitelist-undefined.bst"])
+
+ # Assert that we get the expected undefined variable error,
+ # and that it has the provenance we expect from whitelist-undefined.bst
+ #
+ result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.UNRESOLVED_VARIABLE)
+ assert "whitelist-undefined.bst [line 13 column 6]" in result.stderr
+
+
+@pytest.mark.datafiles(DATA_DIR)
@pytest.mark.parametrize("use_fatal_warnings", [True, False])
def test_overlaps_script(cli, datafiles, use_fatal_warnings):
# Test overlaps with script element to test
diff --git a/tests/frontend/overlaps/whitelist-undefined.bst b/tests/frontend/overlaps/whitelist-undefined.bst
new file mode 100644
index 000000000..5cb0c645e
--- /dev/null
+++ b/tests/frontend/overlaps/whitelist-undefined.bst
@@ -0,0 +1,13 @@
+kind: import
+config:
+ source: /
+ target: /
+depends:
+- b-whitelisted.bst
+sources:
+- kind: local
+ path: "a"
+public:
+ bst:
+ overlap-whitelist:
+ - "%{undefined-variable}/*"