summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin David <valentin.david@codethink.co.uk>2020-05-28 15:34:15 +0200
committerValentin David <valentin.david@codethink.co.uk>2020-06-03 10:35:18 +0000
commit8ae5e115a4693ff33248278f5702614ff6f3abee (patch)
tree42fb0714c0ecdffd800d1a9b35b77c1cc9c7bad2
parent76c51c2d1f00f171c1cf6e517cc4d7e43c740d28 (diff)
downloadbuildstream-8ae5e115a4693ff33248278f5702614ff6f3abee.tar.gz
Optimize variable flattening
Verifying for variables (all used variables defined and no cycle) is much faster than flattening them. It also uses much less memory. Only `bst show -f "%{vars}"` requires to flatten all variables. For other cases we can improve performance by only checking rather than flattening. Also flattening very nested variables could lead to very long lists of empty strings. The memory usage for flattening could be a lot bigger than the resulting value. Force flattening and caching every variable definitions can improve intermediate memory consumption and only consume memory of the size of the result. Finally, an optimization allowed inifinite cycles leading to segmentation fault. This was solved by simplifying the code.
-rw-r--r--src/buildstream/_frontend/widget.py2
-rw-r--r--src/buildstream/_variables.pyx156
-rw-r--r--src/buildstream/element.py2
-rw-r--r--tests/format/variables.py9
-rw-r--r--tests/format/variables/cyclic_variables/simple-cyclic.bst5
5 files changed, 109 insertions, 65 deletions
diff --git a/src/buildstream/_frontend/widget.py b/src/buildstream/_frontend/widget.py
index 81ca2f7b5..f07e3dba0 100644
--- a/src/buildstream/_frontend/widget.py
+++ b/src/buildstream/_frontend/widget.py
@@ -372,7 +372,7 @@ class LogLine(Widget):
# Variables
if "%{vars" in format_:
- variables = element._Element__variables.flat
+ variables = dict(element._Element__variables)
line = p.fmt_subst(
line, "vars", yaml.round_trip_dump(variables, default_flow_style=False, allow_unicode=True)
)
diff --git a/src/buildstream/_variables.pyx b/src/buildstream/_variables.pyx
index 9dc6cf623..e3fcfc3a0 100644
--- a/src/buildstream/_variables.pyx
+++ b/src/buildstream/_variables.pyx
@@ -68,12 +68,44 @@ cdef class Variables:
cdef MappingNode original
cdef dict _expstr_map
- cdef public dict flat
def __init__(self, MappingNode node):
self.original = node
self._expstr_map = self._resolve(node)
- self.flat = self._flatten()
+ self._check_for_missing()
+ self._check_for_cycles()
+
+ def __getitem__(self, str name):
+ return _expand_var(self._expstr_map, name)
+
+ def __contains__(self, str name):
+ return name in self._expstr_map
+
+ # __iter__()
+ #
+ # Provide an iterator for all variables effective values
+ #
+ # Returns:
+ # (Iterator[Tuple[str, str]])
+ #
+ def __iter__(self):
+ return _VariablesIterator(self._expstr_map)
+
+ # get()
+ #
+ # Expand definition of variable by name. If the variable is not
+ # defined, it will return None instead of failing.
+ #
+ # Args:
+ # name (str): Name of the variable to expand
+ #
+ # Returns:
+ # (str|None): The expanded value for the variable or None variable was not defined.
+ #
+ cpdef str get(self, str name):
+ if name not in self._expstr_map:
+ return None
+ return _expand_var(self._expstr_map, name)
# expand()
#
@@ -188,36 +220,6 @@ cdef class Variables:
if key not in cleared:
cycle_check(expstr, [key], cleared)
- # _flatten():
- #
- # Turn our dictionary of expansion strings into a flattened dict
- # so that we can run expansions faster in the future
- #
- # Raises:
- # LoadError, if the string contains unresolved variable references or
- # if cycles are detected in the variable references
- #
- cdef dict _flatten(self):
- cdef dict flat = {}
- cdef str key
- cdef list expstr
-
- try:
- for key, expstr in self._expstr_map.items():
- if len(expstr) > 1:
- # FIXME: do we really gain anything by interning?
- expstr = [sys.intern(_expand_expstr(self._expstr_map, expstr))]
- self._expstr_map[key] = expstr
- flat[key] = expstr[0]
- except KeyError:
- self._check_for_missing()
- raise
- except RecursionError:
- self._check_for_cycles()
- raise
- return flat
-
-
# Cache for the parsed expansion strings. While this is nominally
# something which might "waste" memory, in reality each of these
# will live as long as the element which uses it, which is the
@@ -255,51 +257,79 @@ cdef list _parse_expstr(str instr):
return ret
-# Helper to expand recursively an expansion string in the context
-# of the given dictionary of expansion string
+# Helper to expand and cache a variable definition in the context of
+# the given dictionary of expansion strings.
#
# Args:
-# content (dict): dictionary context for resolving the variables
-# value (list): expansion string to expand
-# acc (list): list in which to add the result
-# counter (int): counter to count the number of recursion in order to
-# detect cycles.
+# content (dict): Dictionary of expansion strings
+# name (str): Name of the variable to expand
+# counter (int): Recursion counter
+#
+# Returns:
+# (str): The expanded value of variable
#
# Raises:
-# KeyError: if any expansion is missing
-# RecursionError: if a variable is defined recursively
+# KeyError, if any expansion is missing
+# RecursionError, if recursion required for evaluation is too deep
#
-cdef void _expand_expstr_helper(dict content, list value, list acc, int counter = 0) except *:
- cdef Py_ssize_t idx = 0
- cdef Py_ssize_t value_len = len(value)
+cdef str _expand_var(dict content, str name, int counter = 0):
+ cdef str sub
+
+ if len(content[name]) > 1:
+ sub = _expand_expstr(content, <list> content[name], counter)
+ content[name] = [sys.intern(sub)]
+ return content[name][0]
+
+
+# Helper to expand a given top level expansion string tuple in the context
+# of the given dictionary of expansion strings.
+#
+# Args:
+# content (dict): Dictionary of expansion strings
+# name (str): Name of the variable to expand
+# counter (int): Recursion counter
+#
+# Returns:
+# (str): The expanded value of variable
+#
+# Raises:
+# KeyError, if any expansion is missing
+# RecursionError, if recursion required for evaluation is too deep
+#
+cdef str _expand_expstr(dict content, list value, int counter = 0):
if counter > 1000:
raise RecursionError()
+ cdef Py_ssize_t idx = 0
+ cdef Py_ssize_t value_len = len(value)
+ cdef str sub
+ cdef list acc = []
+
while idx < value_len:
acc.append(value[idx])
idx += 1
if idx < value_len:
- _expand_expstr_helper(content, <list> content[value[idx]], acc, counter + 1)
-
+ acc.append(_expand_var(content, <str> value[idx], counter + 1))
idx += 1
+ return "".join(acc)
-# Helper to expand a given top level expansion string tuple in the context
-# of the given dictionary of expansion strings.
-#
-# Note: Will raise KeyError if any expansion is missing
-cdef str _expand_expstr(dict content, list topvalue):
- # Short-circuit constant strings
- if len(topvalue) == 1:
- return <str> topvalue[0]
-
- # Short-circuit strings which are entirely an expansion of another variable
- # e.g. "%{another}"
- if len(topvalue) == 2 and len(<str> topvalue[0]) == 0:
- return _expand_expstr(content, <list> content[topvalue[1]])
-
- cdef list result = []
- _expand_expstr_helper(content, topvalue, result)
- return "".join(result)
+
+# Iterator for all flatten variables.
+# Used by Variables.__iter__
+cdef class _VariablesIterator:
+ cdef dict _expstr_map
+ cdef object _iter
+
+ def __init__(self, dict expstr_map):
+ self._expstr_map = expstr_map
+ self._iter = iter(expstr_map)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ name = next(self._iter)
+ return name, _expand_var(self._expstr_map, name)
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 09b0e4c06..917ad9ba9 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -852,7 +852,7 @@ class Element(Plugin):
variable was declared with the given name.
"""
# Flat is not recognized correctly by Pylint as being a dictionary
- return self.__variables.flat.get(varname) # pylint: disable=no-member
+ return self.__variables.get(varname) # pylint: disable=no-member
def batch_prepare_assemble(self, flags: int, *, collect: Optional[str] = None) -> None:
""" Configure command batching across prepare() and assemble()
diff --git a/tests/format/variables.py b/tests/format/variables.py
index 616dc20c1..c5e8eebad 100644
--- a/tests/format/variables.py
+++ b/tests/format/variables.py
@@ -63,6 +63,15 @@ def test_missing_variable(cli, datafiles, element):
@pytest.mark.timeout(15, method="signal")
@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
+def test_simple_cyclic_variables(cli, datafiles):
+ print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
+ project = str(datafiles)
+ result = cli.run(project=project, silent=True, args=["build", "simple-cyclic.bst"])
+ result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.RECURSIVE_VARIABLE)
+
+
+@pytest.mark.timeout(15, method="signal")
+@pytest.mark.datafiles(os.path.join(DATA_DIR, "cyclic_variables"))
def test_cyclic_variables(cli, datafiles):
print_warning("Performing cyclic test, if this test times out it will " + "exit the test sequence")
project = str(datafiles)
diff --git a/tests/format/variables/cyclic_variables/simple-cyclic.bst b/tests/format/variables/cyclic_variables/simple-cyclic.bst
new file mode 100644
index 000000000..806e1f390
--- /dev/null
+++ b/tests/format/variables/cyclic_variables/simple-cyclic.bst
@@ -0,0 +1,5 @@
+kind: manual
+
+variables:
+ a: "%{b}"
+ b: "%{a}"