diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2019-05-30 09:34:49 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2019-05-30 09:34:49 +0000 |
commit | 8b302c55d2da4f7ab29e346496a8e2fe3d7a7660 (patch) | |
tree | 8e46707ab3af4ecca682bfa754ea4d5233921690 | |
parent | 4b450f664b1bdd0f342f36453f8e46edbd0c887a (diff) | |
parent | 32ee4163bc6c628dc1d475a110022b37bf661fa5 (diff) | |
download | buildstream-8b302c55d2da4f7ab29e346496a8e2fe3d7a7660.tar.gz |
Merge branch 'bschubert/cython' into 'master'
Introduce Cython for better performances
See merge request BuildStream/buildstream!1350
-rw-r--r-- | .coveragerc | 3 | ||||
-rw-r--r-- | .gitignore | 10 | ||||
-rw-r--r-- | .pylintrc | 2 | ||||
-rw-r--r-- | CONTRIBUTING.rst | 41 | ||||
-rw-r--r-- | MANIFEST.in | 8 | ||||
-rw-r--r-- | pyproject.toml | 9 | ||||
-rw-r--r-- | requirements/cov-requirements.in | 1 | ||||
-rw-r--r-- | requirements/cov-requirements.txt | 1 | ||||
-rwxr-xr-x | setup.py | 110 | ||||
-rw-r--r-- | src/buildstream/_variables.pyx (renamed from src/buildstream/_variables.py) | 130 | ||||
-rw-r--r-- | src/buildstream/_yaml.pxd | 44 | ||||
-rw-r--r-- | src/buildstream/_yaml.pyx (renamed from src/buildstream/_yaml.py) | 686 | ||||
-rw-r--r-- | src/buildstream/element.py | 3 | ||||
-rw-r--r-- | tests/internals/yaml.py | 14 | ||||
-rw-r--r-- | tox.ini | 20 |
15 files changed, 725 insertions, 357 deletions
diff --git a/.coveragerc b/.coveragerc index 457c439a6..5b06d817a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,5 +1,6 @@ [run] concurrency = multiprocessing +plugins = Cython.Coverage omit = # Omit some internals @@ -11,6 +12,8 @@ omit = */.eggs/* # Omit .tox directory */.tox/* + # Omit a dynamically generated Cython file + */stringsource [report] show_missing = True diff --git a/.gitignore b/.gitignore index 3bc4e29af..a61a975ac 100644 --- a/.gitignore +++ b/.gitignore @@ -2,12 +2,22 @@ src/buildstream/**/*.pyc tests/**/*.pyc +# Generated C files +src/buildstream/**/*.c +src/buildstream/**/*.so + +# Cython report files when using annotations +src/buildstream/**/*.html + # Build output directory build # Setuptools distribution folder. /dist/ +# Pip build metadata +pip-wheel-metadata/ + # Python egg metadata, regenerated from source files by setuptools. /src/*.egg-info .eggs @@ -3,7 +3,7 @@ # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code -extension-pkg-whitelist= +extension-pkg-whitelist=buildstream._variables,buildstream._yaml # Add files or directories to the blacklist. They should be base names, not # paths. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 921cee909..b128c7f9b 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1598,6 +1598,15 @@ can run ``tox`` with ``-r`` or ``--recreate`` option. ./setup.py test --addopts 'tests/frontend/buildtrack.py::test_build_track' + If you want to run coverage, you will need need to add ``BST_CYTHON_TRACE=1`` + to your environment if you also want coverage on cython files. You could then + get coverage by running:: + + BST_CYTHON_TRACE=1 coverage run ./setup.py test + + Note that to be able to run ``./setup.py test``, you will need to have ``Cython`` + installed. + .. tip:: We also have an environment called 'venv' which takes any arguments @@ -1737,6 +1746,12 @@ You can install it with `pip install snakeviz`. Here is an example invocation: It will then start a webserver and launch a browser to the relevant page. +.. note:: + + If you want to also profile cython files, you will need to add + BST_CYTHON_PROFILE=1 and recompile the cython files. + ``pip install`` would take care of that. + Profiling specific parts of BuildStream with BST_PROFILE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BuildStream can also turn on cProfile for specific parts of execution @@ -1753,6 +1768,32 @@ call most of `initialized`, for each element. These profile files are in the same cProfile format as those mentioned in the previous section, and can be analysed in the same way. +Fixing performance issues +~~~~~~~~~~~~~~~~~~~~~~~~~ + +BuildStream uses `Cython <https://cython.org/>`_ in order to speed up specific parts of the +code base. + +.. note:: + + When optimizing for performance, please ensure that you optimize the algorithms before + jumping into Cython code. Cython will make the code harder to maintain and less accessible + to all developers. + +When adding a new cython file to the codebase, you will need to register it in ``setup.py``. + +For example, for a module ``buildstream._my_module``, to be written in ``src/buildstream/_my_module.pyx``, you would do:: + + register_cython_module("buildstream._my_module") + +In ``setup.py`` and the build tool will automatically use your module. + +.. note:: + + Please register cython modules at the same place as the others. + +When adding a definition class to share cython symbols between modules, please document the implementation file +and only expose the required definitions. Managing data files ------------------- diff --git a/MANIFEST.in b/MANIFEST.in index d0de0e593..07369c481 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,6 +6,11 @@ include MAINTAINERS include NEWS include README.rst +# Cython files +recursive-include src/buildstream *.pyx +recursive-include src/buildstream *.pxd +recursive-include src/buildstream *.c + # Data files required by BuildStream's generic source tests recursive-include src/buildstream/testing/_sourcetests/project * @@ -43,3 +48,6 @@ include requirements/plugin-requirements.txt # Versioneer include versioneer.py + +# setuptools.build_meta don't include setup.py by default. Add it +include setup.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..4dd02d1e5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[build-system] +requires = [ + # We need at least version 36.6.0 that introduced "build_meta" + "setuptools>=36.6.0", + # In order to build wheels, and as required by PEP 517 + "wheel", + "Cython" +] +build-backend = "setuptools.build_meta" diff --git a/requirements/cov-requirements.in b/requirements/cov-requirements.in index 455b91ba6..fb582f816 100644 --- a/requirements/cov-requirements.in +++ b/requirements/cov-requirements.in @@ -1,2 +1,3 @@ coverage == 4.4.0 pytest-cov >= 2.5.0 +Cython diff --git a/requirements/cov-requirements.txt b/requirements/cov-requirements.txt index 843df85f3..a8ba7843b 100644 --- a/requirements/cov-requirements.txt +++ b/requirements/cov-requirements.txt @@ -1,4 +1,5 @@ coverage==4.4 +cython==0.29.7 pytest-cov==2.6.1 ## The following requirements were added by pip freeze: atomicwrites==1.3.0 @@ -17,15 +17,18 @@ # # Authors: # Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> +# Benjamin Schubert <bschubert15@bloomberg.net> import os +from pathlib import Path import re import shutil import subprocess import sys -import versioneer -from pathlib import Path +# Add local directory to the path, in order to be able to import versioneer +sys.path.append(os.path.dirname(__file__)) +import versioneer # noqa ################################################################## @@ -39,7 +42,7 @@ if sys.version_info[0] != REQUIRED_PYTHON_MAJOR or sys.version_info[1] < REQUIRE sys.exit(1) try: - from setuptools import setup, find_packages, Command + from setuptools import setup, find_packages, Command, Extension from setuptools.command.easy_install import ScriptWriter from setuptools.command.test import test as TestCommand except ImportError: @@ -303,6 +306,95 @@ with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), ##################################################### +# Setup Cython and extensions # +##################################################### +# We want to ensure that source distributions always +# include the .c files, in order to allow users to +# not need cython when building. +def assert_cython_required(): + if "sdist" not in sys.argv: + return + + print("Cython is required when building 'sdist' in order to " + "ensure source distributions can be built without Cython. " + "Please install it using your package manager (usually 'python3-cython') " + "or pip (pip install cython).", + file=sys.stderr) + + raise SystemExit(1) + + +extension_macros = [ + ("CYTHON_TRACE", os.environ.get("BST_CYTHON_TRACE", 0)) +] + + +def cythonize(extensions, **kwargs): + try: + from Cython.Build import cythonize as _cythonize + except ImportError: + assert_cython_required() + + print("Cython not found. Using preprocessed c files instead") + + missing_c_sources = [] + + for extension in extensions: + for source in extension.sources: + if source.endswith(".pyx"): + c_file = source.replace(".pyx", ".c") + + if not os.path.exists(c_file): + missing_c_sources.append((extension, c_file)) + + if missing_c_sources: + for extension, source in missing_c_sources: + print("Missing '{}' for building extension '{}'".format(source, extension.name)) + + raise SystemExit(1) + return extensions + + return _cythonize(extensions, **kwargs) + + +def register_cython_module(module_name, dependencies=None): + def files_from_module(modname): + basename = "src/{}".format(modname.replace(".", "/")) + return "{}.pyx".format(basename), "{}.pxd".format(basename) + + if dependencies is None: + dependencies = [] + + implementation_file, definition_file = files_from_module(module_name) + + assert os.path.exists(implementation_file) + + depends = [] + if os.path.exists(definition_file): + depends.append(definition_file) + + for module in dependencies: + imp_file, def_file = files_from_module(module) + assert os.path.exists(imp_file), "Dependency file not found: {}".format(imp_file) + assert os.path.exists(def_file), "Dependency declaration file not found: {}".format(def_file) + + depends.append(imp_file) + depends.append(def_file) + + BUILD_EXTENSIONS.append(Extension( + name=module_name, + sources=[implementation_file], + depends=depends, + define_macros=extension_macros, + )) + + +BUILD_EXTENSIONS = [] + +register_cython_module("buildstream._yaml") +register_cython_module("buildstream._variables", dependencies=["buildstream._yaml"]) + +##################################################### # Main setup() Invocation # ##################################################### setup(name='BuildStream', @@ -360,4 +452,16 @@ setup(name='BuildStream', install_requires=install_requires, entry_points=bst_install_entry_points, tests_require=dev_requires, + ext_modules=cythonize( + BUILD_EXTENSIONS, + compiler_directives={ + # Version of python to use + # https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#arguments + "language_level": "3", + # Enable line tracing, this is needed in order to generate coverage. + # This is not enabled unless the CYTHON_TRACE macro for distutils is defined. + "linetrace": True, + "profile": os.environ.get("BST_CYTHON_PROFILE", False), + } + ), zip_safe=False) diff --git a/src/buildstream/_variables.py b/src/buildstream/_variables.pyx index 74314cf1f..9b8b5a902 100644 --- a/src/buildstream/_variables.py +++ b/src/buildstream/_variables.pyx @@ -18,12 +18,13 @@ # Authors: # Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> # Daniel Silverstone <daniel.silverstone@codethink.co.uk> +# Benjamin Schubert <bschubert@bloomberg.net> import re import sys from ._exceptions import LoadError, LoadErrorReason -from . import _yaml +from . cimport _yaml # Variables are allowed to have dashes here # @@ -34,9 +35,8 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") # These hold data structures called "expansion strings" and are the parsed # form of the strings which are the input to this subsystem. Strings # such as "Hello %{name}, how are you?" are parsed into the form: -# (3, ["Hello ", "name", ", how are you?"]) -# i.e. a tuple of an integer and a list, where the integer is the cached -# length of the list, and the list consists of one or more strings. +# ["Hello ", "name", ", how are you?"] +# i.e. a list which consists of one or more strings. # Strings in even indices of the list (0, 2, 4, etc) are constants which # are copied into the output of the expansion algorithm. Strings in the # odd indices (1, 3, 5, etc) are the names of further expansions to make. @@ -58,15 +58,18 @@ PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}") # variable settings for the element. # # Args: -# node (dict): A node loaded and composited with yaml tools +# node (Node): A node loaded and composited with yaml tools # # Raises: # LoadError, if unresolved variables, or cycles in resolution, occur. # -class Variables(): +cdef class Variables: - def __init__(self, node): + cdef _yaml.Node original + cdef dict _expstr_map + cdef public dict flat + def __init__(self, _yaml.Node node): self.original = node self._expstr_map = self._resolve(node) self.flat = self._flatten() @@ -84,7 +87,7 @@ class Variables(): # Raises: # LoadError, if the string contains unresolved variable references. # - def subst(self, string): + def subst(self, str string): expstr = _parse_expstr(string) try: @@ -93,7 +96,7 @@ class Variables(): unmatched = [] # Look for any unmatched variable names in the expansion string - for var in expstr[1][1::2]: + for var in expstr[1::2]: if var not in self._expstr_map: unmatched.append(var) @@ -112,17 +115,20 @@ class Variables(): # # Here we resolve all of our inputs into a dictionary, ready for use # in subst() - def _resolve(self, node): + cdef dict _resolve(self, _yaml.Node node): # Special case, if notparallel is specified in the variables for this # element, then override max-jobs to be 1. # Initialize it as a string as all variables are processed as strings. # - if _yaml.node_get(node, bool, 'notparallel', default_value=False): + if _yaml.node_get(node, bool, 'notparallel', None, False): _yaml.node_set(node, 'max-jobs', str(1)) - ret = {} - for key, value in _yaml.node_items(node): - value = _yaml.node_get(node, str, key) + cdef dict ret = {} + cdef str key + cdef str value + + for key in _yaml.node_keys(node): + value = <str> _yaml.node_get(node, str, key) ret[sys.intern(key)] = _parse_expstr(value) return ret @@ -130,7 +136,7 @@ class Variables(): # First the check for anything unresolvable summary = [] for key, expstr in self._expstr_map.items(): - for var in expstr[1][1::2]: + for var in expstr[1::2]: if var not in self._expstr_map: line = " unresolved variable '{unmatched}' in declaration of '{variable}' at: {provenance}" provenance = _yaml.node_get_provenance(self.original, key) @@ -142,7 +148,7 @@ class Variables(): def _check_for_cycles(self): # And now the cycle checks def cycle_check(expstr, visited, cleared): - for var in expstr[1][1::2]: + for var in expstr[1::2]: if var in cleared: continue if var in visited: @@ -169,14 +175,18 @@ class Variables(): # LoadError, if the string contains unresolved variable references or # if cycles are detected in the variable references # - def _flatten(self): - flat = {} + cdef dict _flatten(self): + cdef dict flat = {} + cdef str key + cdef list expstr + try: for key, expstr in self._expstr_map.items(): - if expstr[0] > 1: - expstr = (1, [sys.intern(_expand_expstr(self._expstr_map, expstr))]) + if len(expstr) > 1: + # FIXME: do we really gain anything by interning? + expstr = [sys.intern(_expand_expstr(self._expstr_map, expstr))] self._expstr_map[key] = expstr - flat[key] = expstr[1][0] + flat[key] = expstr[0] except KeyError: self._check_for_missing() raise @@ -190,19 +200,21 @@ class Variables(): # something which might "waste" memory, in reality each of these # will live as long as the element which uses it, which is the # vast majority of the memory usage across the execution of BuildStream. -PARSE_CACHE = { +cdef dict PARSE_CACHE = { # Prime the cache with the empty string since otherwise that can # cause issues with the parser, complications to which cause slowdown - "": (1, [""]), + "": [""], } # Helper to parse a string into an expansion string tuple, caching # the results so that future parse requests don't need to think about # the string -def _parse_expstr(instr): +cdef list _parse_expstr(str instr): + cdef list ret + try: - return PARSE_CACHE[instr] + return <list> PARSE_CACHE[instr] except KeyError: # This use of the regex turns a string like "foo %{bar} baz" into # a list ["foo ", "bar", " baz"] @@ -211,41 +223,61 @@ def _parse_expstr(instr): # which we can optimise away, making the expansion routines not need # a test for this. if splits[-1] == '': - splits = splits[:-1] + del splits [-1] # Cache an interned copy of this. We intern it to try and reduce the # memory impact of the cache. It seems odd to cache the list length # but this is measurably cheaper than calculating it each time during # string expansion. - PARSE_CACHE[instr] = (len(splits), [sys.intern(s) for s in splits]) - return PARSE_CACHE[instr] + ret = [sys.intern(<str> s) for s in <list> splits] + PARSE_CACHE[instr] = ret + return ret + + +# Helper to expand recursively an expansion string in the context +# of the given dictionary of expansion string +# +# Args: +# content (dict): dictionary context for resolving the variables +# value (list): expansion string to expand +# acc (list): list in which to add the result +# counter (int): counter to count the number of recursion in order to +# detect cycles. +# +# Raises: +# KeyError: if any expansion is missing +# RecursionError: if a variable is defined recursively +# +cdef void _expand_expstr_helper(dict content, list value, list acc, int counter = 0) except *: + cdef Py_ssize_t idx = 0 + cdef Py_ssize_t value_len = len(value) + + if counter > 1000: + raise RecursionError() + + while idx < value_len: + acc.append(value[idx]) + idx += 1 + + if idx < value_len: + _expand_expstr_helper(content, <list> content[value[idx]], acc, counter + 1) + + idx += 1 # Helper to expand a given top level expansion string tuple in the context # of the given dictionary of expansion strings. # # Note: Will raise KeyError if any expansion is missing -def _expand_expstr(content, topvalue): +cdef str _expand_expstr(dict content, list topvalue): # Short-circuit constant strings - if topvalue[0] == 1: - return topvalue[1][0] + if len(topvalue) == 1: + return <str> topvalue[0] # Short-circuit strings which are entirely an expansion of another variable # e.g. "%{another}" - if topvalue[0] == 2 and topvalue[1][0] == "": - return _expand_expstr(content, content[topvalue[1][1]]) - - # Otherwise process fully... - def internal_expand(value): - (expansion_len, expansion_bits) = value - idx = 0 - while idx < expansion_len: - # First yield any constant string content - yield expansion_bits[idx] - idx += 1 - # Now, if there is an expansion variable left to expand, yield - # the expansion of that variable too - if idx < expansion_len: - yield from internal_expand(content[expansion_bits[idx]]) - idx += 1 - - return "".join(internal_expand(topvalue)) + if len(topvalue) == 2 and len(<str> topvalue[0]) == 0: + return _expand_expstr(content, <list> content[topvalue[1]]) + + cdef list result = [] + _expand_expstr_helper(content, topvalue, result) + return "".join(result) diff --git a/src/buildstream/_yaml.pxd b/src/buildstream/_yaml.pxd new file mode 100644 index 000000000..27a1a888e --- /dev/null +++ b/src/buildstream/_yaml.pxd @@ -0,0 +1,44 @@ +# +# Copyright (C) 2019 Bloomberg L.P. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see <http://www.gnu.org/licenses/>. +# +# Authors: +# Benjamin Schubert <bschubert@bloomberg.net> + +# Documentation for each class and method here can be found in the adjacent +# implementation file (_yaml.pyx) + +cdef class Node: + + cdef public object value + cdef public int file_index + cdef public int line + cdef public int column + + +cdef class ProvenanceInformation: + + cdef public Node node + cdef str displayname + cdef public str filename, shortname + cdef public int col, line + cdef public object project, toplevel + cdef public bint is_synthetic + + +cpdef object node_get(Node node, object expected_type, str key, list indices=*, object default_value=*, bint allow_none=*) +cpdef void node_set(Node node, object key, object value, list indices=*) except * +cpdef list node_keys(object node) +cpdef ProvenanceInformation node_get_provenance(Node node, str key=*, list indices=*) diff --git a/src/buildstream/_yaml.py b/src/buildstream/_yaml.pyx index cdab4269e..50f0c64a9 100644 --- a/src/buildstream/_yaml.py +++ b/src/buildstream/_yaml.pyx @@ -19,14 +19,14 @@ # Tristan Van Berkom <tristan.vanberkom@codethink.co.uk> # Daniel Silverstone <daniel.silverstone@codethink.co.uk> # James Ennis <james.ennis@codethink.co.uk> +# Benjamin Schubert <bschubert@bloomberg.net> import sys import string from contextlib import ExitStack -from collections import OrderedDict, namedtuple +from collections import OrderedDict from collections.abc import Mapping, Sequence from copy import deepcopy -from itertools import count from ruamel import yaml from ._exceptions import LoadError, LoadErrorReason @@ -55,50 +55,66 @@ from ._exceptions import LoadError, LoadErrorReason # line (int): The line number within the file where the value appears. # col (int): The column number within the file where the value appears. # -# For efficiency, each field should be accessed by its integer index: -# value = Node[0] -# file_index = Node[1] -# line = Node[2] -# column = Node[3] -# -class Node(namedtuple('Node', ['value', 'file_index', 'line', 'column'])): +cdef class Node: + + def __init__(self, object value, int file_index, int line, int column): + self.value = value + self.file_index = file_index + self.line = line + self.column = column + def __contains__(self, what): # Delegate to the inner value, though this will likely not work # very well if the node is a list or string, it's unlikely that # code which has access to such nodes would do this. - return what in self[0] + return what in self.value + + +# Metadata container for a yaml toplevel node. +# +# This class contains metadata around a yaml node in order to be able +# to trace back the provenance of a node to the file. +# +cdef class FileInfo: + + cdef str filename, shortname, displayname + cdef Node toplevel, + cdef object project + + def __init__(self, str filename, str shortname, str displayname, Node toplevel, object project): + self.filename = filename + self.shortname = shortname + self.displayname = displayname + self.toplevel = toplevel + self.project = project # File name handling -_FILE_LIST = [] +cdef _FILE_LIST = [] -# Purely synthetic node will have None for the file number, have line number +# Purely synthetic node will have _SYNTHETIC_FILE_INDEX for the file number, have line number # zero, and a negative column number which comes from inverting the next value # out of this counter. Synthetic nodes created with a reference node will # have a file number from the reference node, some unknown line number, and # a negative column number from this counter. -_SYNTHETIC_COUNTER = count(start=-1, step=-1) +cdef int _SYNTHETIC_FILE_INDEX = -1 +cdef int __counter = 0 + +cdef int next_synthetic_counter(): + global __counter + __counter -= 1 + return __counter # Returned from node_get_provenance -class ProvenanceInformation: - - __slots__ = ( - "filename", - "shortname", - "displayname", - "line", - "col", - "toplevel", - "node", - "project", - "is_synthetic", - ) - - def __init__(self, nodeish): +cdef class ProvenanceInformation: + + def __init__(self, Node nodeish): + cdef FileInfo fileinfo + self.node = nodeish - if (nodeish is None) or (nodeish[1] is None): + if (nodeish is None) or (nodeish.file_index is None): self.filename = "" self.shortname = "" self.displayname = "" @@ -107,15 +123,15 @@ class ProvenanceInformation: self.toplevel = None self.project = None else: - fileinfo = _FILE_LIST[nodeish[1]] - self.filename = fileinfo[0] - self.shortname = fileinfo[1] - self.displayname = fileinfo[2] + fileinfo = <FileInfo> _FILE_LIST[nodeish.file_index] + self.filename = fileinfo.filename + self.shortname = fileinfo.shortname + self.displayname = fileinfo.displayname # We add 1 here to convert from computerish to humanish - self.line = nodeish[2] + 1 - self.col = nodeish[3] - self.toplevel = fileinfo[3] - self.project = fileinfo[4] + self.line = nodeish.line + 1 + self.col = nodeish.column + self.toplevel = fileinfo.toplevel + self.project = fileinfo.project self.is_synthetic = (self.filename == '') or (self.col < 0) # Convert a Provenance to a string for error reporting @@ -140,6 +156,19 @@ class YAMLLoadError(Exception): pass +# Represents the various states in which the Representer can be +# while parsing yaml. +cdef enum RepresenterState: + doc + init + stream + wait_key + wait_list_item + wait_value + + +ctypedef RepresenterState (*representer_action)(Representer, object) + # Representer for YAML events comprising input to the BuildStream format. # # All streams MUST represent a single document which must be a Mapping. @@ -148,13 +177,11 @@ class YAMLLoadError(Exception): # Mappings must only have string keys, values are always represented as # strings if they are scalar, or else as simple dictionaries and lists. # -class Representer: - __slots__ = ( - "_file_index", - "state", - "output", - "keys", - ) +cdef class Representer: + + cdef int _file_index + cdef RepresenterState state + cdef list output, keys # Initialise a new representer # @@ -163,9 +190,9 @@ class Representer: # # Args: # file_index (int): The index of this YAML file - def __init__(self, file_index): + def __init__(self, int file_index): self._file_index = file_index - self.state = "init" + self.state = RepresenterState.init self.output = [] self.keys = [] @@ -176,12 +203,14 @@ class Representer: # # Raises: # YAMLLoadError: Something went wrong. - def handle_event(self, event): + cdef void handle_event(self, event) except *: if getattr(event, "anchor", None) is not None: raise YAMLLoadError("Anchors are disallowed in BuildStream at line {} column {}" .format(event.start_mark.line, event.start_mark.column)) - if event.__class__.__name__ == "ScalarEvent": + cdef str event_name = event.__class__.__name__ + + if event_name == "ScalarEvent": if event.tag is not None: if not event.tag.startswith("tag:yaml.org,2002:"): raise YAMLLoadError( @@ -189,104 +218,139 @@ class Representer: "This is disallowed in BuildStream. At line {} column {}" .format(event.start_mark.line, event.start_mark.column)) - handler = "_handle_{}_{}".format(self.state, event.__class__.__name__) - handler = getattr(self, handler, None) - if handler is None: + cdef representer_action handler = self._get_handler_for_event(event_name) + if not handler: raise YAMLLoadError( "Invalid input detected. No handler for {} in state {} at line {} column {}" .format(event, self.state, event.start_mark.line, event.start_mark.column)) - self.state = handler(event) # pylint: disable=not-callable + # Cython weirdness here, we need to pass self to the function + self.state = <RepresenterState> handler(self, event) # pylint: disable=not-callable # Get the output of the YAML parse # # Returns: # (Node or None): Return the Node instance of the top level mapping or # None if there wasn't one. - def get_output(self): - try: + cdef Node get_output(self): + if len(self.output): return self.output[0] - except IndexError: - return None - - def _handle_init_StreamStartEvent(self, ev): - return "stream" - - def _handle_stream_DocumentStartEvent(self, ev): - return "doc" + return None - def _handle_doc_MappingStartEvent(self, ev): + cdef representer_action _get_handler_for_event(self, str event_name): + if self.state == RepresenterState.wait_list_item: + if event_name == "ScalarEvent": + return self._handle_wait_list_item_ScalarEvent + elif event_name == "MappingStartEvent": + return self._handle_wait_list_item_MappingStartEvent + elif event_name == "SequenceStartEvent": + return self._handle_wait_list_item_SequenceStartEvent + elif event_name == "SequenceEndEvent": + return self._handle_wait_list_item_SequenceEndEvent + elif self.state == RepresenterState.wait_value: + if event_name == "ScalarEvent": + return self._handle_wait_value_ScalarEvent + elif event_name == "MappingStartEvent": + return self._handle_wait_value_MappingStartEvent + elif event_name == "SequenceStartEvent": + return self._handle_wait_value_SequenceStartEvent + elif self.state == RepresenterState.wait_key: + if event_name == "ScalarEvent": + return self._handle_wait_key_ScalarEvent + elif event_name == "MappingEndEvent": + return self._handle_wait_key_MappingEndEvent + elif self.state == RepresenterState.stream: + if event_name == "DocumentStartEvent": + return self._handle_stream_DocumentStartEvent + elif event_name == "StreamEndEvent": + return self._handle_stream_StreamEndEvent + elif self.state == RepresenterState.doc: + if event_name == "MappingStartEvent": + return self._handle_doc_MappingStartEvent + elif event_name == "DocumentEndEvent": + return self._handle_doc_DocumentEndEvent + elif self.state == RepresenterState.init and event_name == "StreamStartEvent": + return self._handle_init_StreamStartEvent + return NULL + + cdef RepresenterState _handle_init_StreamStartEvent(self, object ev): + return RepresenterState.stream + + cdef RepresenterState _handle_stream_DocumentStartEvent(self, object ev): + return RepresenterState.doc + + cdef RepresenterState _handle_doc_MappingStartEvent(self, object ev): newmap = Node({}, self._file_index, ev.start_mark.line, ev.start_mark.column) self.output.append(newmap) - return "wait_key" + return RepresenterState.wait_key - def _handle_wait_key_ScalarEvent(self, ev): + cdef RepresenterState _handle_wait_key_ScalarEvent(self, object ev): self.keys.append(ev.value) - return "wait_value" + return RepresenterState.wait_value - def _handle_wait_value_ScalarEvent(self, ev): + cdef RepresenterState _handle_wait_value_ScalarEvent(self, object ev): key = self.keys.pop() - self.output[-1][0][key] = \ + (<dict> (<Node> self.output[-1]).value)[key] = \ Node(ev.value, self._file_index, ev.start_mark.line, ev.start_mark.column) - return "wait_key" + return RepresenterState.wait_key - def _handle_wait_value_MappingStartEvent(self, ev): - new_state = self._handle_doc_MappingStartEvent(ev) + cdef RepresenterState _handle_wait_value_MappingStartEvent(self, object ev): + cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev) key = self.keys.pop() - self.output[-2][0][key] = self.output[-1] + (<dict> (<Node> self.output[-2]).value)[key] = self.output[-1] return new_state - def _handle_wait_key_MappingEndEvent(self, ev): + cdef RepresenterState _handle_wait_key_MappingEndEvent(self, object ev): # We've finished a mapping, so pop it off the output stack # unless it's the last one in which case we leave it if len(self.output) > 1: self.output.pop() - if type(self.output[-1][0]) is list: - return "wait_list_item" + if type((<Node> self.output[-1]).value) is list: + return RepresenterState.wait_list_item else: - return "wait_key" + return RepresenterState.wait_key else: - return "doc" + return RepresenterState.doc - def _handle_wait_value_SequenceStartEvent(self, ev): + cdef RepresenterState _handle_wait_value_SequenceStartEvent(self, object ev): self.output.append(Node([], self._file_index, ev.start_mark.line, ev.start_mark.column)) - self.output[-2][0][self.keys[-1]] = self.output[-1] - return "wait_list_item" + (<dict> (<Node> self.output[-2]).value)[self.keys[-1]] = self.output[-1] + return RepresenterState.wait_list_item - def _handle_wait_list_item_SequenceStartEvent(self, ev): - self.keys.append(len(self.output[-1][0])) + cdef RepresenterState _handle_wait_list_item_SequenceStartEvent(self, object ev): + self.keys.append(len((<Node> self.output[-1]).value)) self.output.append(Node([], self._file_index, ev.start_mark.line, ev.start_mark.column)) - self.output[-2][0].append(self.output[-1]) - return "wait_list_item" + (<list> (<Node> self.output[-2]).value).append(self.output[-1]) + return RepresenterState.wait_list_item - def _handle_wait_list_item_SequenceEndEvent(self, ev): + cdef RepresenterState _handle_wait_list_item_SequenceEndEvent(self, object ev): # When ending a sequence, we need to pop a key because we retain the # key until the end so that if we need to mutate the underlying entry # we can. key = self.keys.pop() self.output.pop() if type(key) is int: - return "wait_list_item" + return RepresenterState.wait_list_item else: - return "wait_key" + return RepresenterState.wait_key - def _handle_wait_list_item_ScalarEvent(self, ev): - self.output[-1][0].append( + cdef RepresenterState _handle_wait_list_item_ScalarEvent(self, object ev): + (<Node> self.output[-1]).value.append( Node(ev.value, self._file_index, ev.start_mark.line, ev.start_mark.column)) - return "wait_list_item" + return RepresenterState.wait_list_item - def _handle_wait_list_item_MappingStartEvent(self, ev): - new_state = self._handle_doc_MappingStartEvent(ev) - self.output[-2][0].append(self.output[-1]) + cdef RepresenterState _handle_wait_list_item_MappingStartEvent(self, object ev): + cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev) + (<list> (<Node> self.output[-2]).value).append(self.output[-1]) return new_state - def _handle_doc_DocumentEndEvent(self, ev): + cdef RepresenterState _handle_doc_DocumentEndEvent(self, object ev): if len(self.output) != 1: raise YAMLLoadError("Zero, or more than one document found in YAML stream") - return "stream" + return RepresenterState.stream - def _handle_stream_StreamEndEvent(self, ev): - return "init" + cdef RepresenterState _handle_stream_StreamEndEvent(self, object ev): + return RepresenterState.init # Loads a dictionary from some YAML @@ -302,17 +366,20 @@ class Representer: # # Raises: LoadError # -def load(filename, shortname=None, copy_tree=False, *, project=None): +cpdef Node load(str filename, str shortname=None, bint copy_tree=False, object project=None): if not shortname: shortname = filename + cdef str displayname if (project is not None) and (project.junction is not None): displayname = "{}:{}".format(project.junction.name, shortname) else: displayname = shortname - file_number = len(_FILE_LIST) - _FILE_LIST.append((filename, shortname, displayname, None, project)) + cdef Py_ssize_t file_number = len(_FILE_LIST) + _FILE_LIST.append(FileInfo(filename, shortname, displayname, None, project)) + + cdef Node data try: with open(filename) as f: @@ -337,12 +404,20 @@ def load(filename, shortname=None, copy_tree=False, *, project=None): # Like load(), but doesnt require the data to be in a file # -def load_data(data, file_index=None, file_name=None, copy_tree=False): +cpdef Node load_data(str data, int file_index=_SYNTHETIC_FILE_INDEX, str file_name=None, bint copy_tree=False): + cdef Representer rep + cdef FileInfo f_info try: rep = Representer(file_index) - for event in yaml.parse(data, Loader=yaml.CBaseLoader): - rep.handle_event(event) + parser = yaml.CParser(data) + + try: + while parser.check_event(): + rep.handle_event(parser.get_event()) + finally: + parser.dispose() + contents = rep.get_output() except YAMLLoadError as e: raise LoadError(LoadErrorReason.INVALID_YAML, @@ -351,7 +426,7 @@ def load_data(data, file_index=None, file_name=None, copy_tree=False): raise LoadError(LoadErrorReason.INVALID_YAML, "Severely malformed YAML:\n\n{}\n\n".format(e)) from e - if not isinstance(contents, tuple) or not isinstance(contents[0], dict): + if type(contents) != Node: # Special case allowance for None, when the loaded file has only comments in it. if contents is None: contents = Node({}, file_index, 0, 0) @@ -362,12 +437,14 @@ def load_data(data, file_index=None, file_name=None, copy_tree=False): # Store this away because we'll use it later for "top level" provenance if file_index is not None: - _FILE_LIST[file_index] = ( - _FILE_LIST[file_index][0], # Filename - _FILE_LIST[file_index][1], # Shortname - _FILE_LIST[file_index][2], # Displayname + f_info = <FileInfo> _FILE_LIST[file_index] + + _FILE_LIST[file_index] = FileInfo( + f_info.filename, + f_info.shortname, + f_info.displayname, contents, - _FILE_LIST[file_index][4], # Project + f_info.project, ) if copy_tree: @@ -386,7 +463,7 @@ def load_data(data, file_index=None, file_name=None, copy_tree=False): # Args: # contents (any): Content to write out # filename (str): The (optional) file name to write out to -def dump(contents, filename=None): +def dump(object contents, str filename=None): roundtrip_dump(node_sanitize(contents), file=filename) @@ -395,25 +472,25 @@ def dump(contents, filename=None): # Gets the provenance for a node # # Args: -# node (dict): a dictionary +# node (Node): a dictionary # key (str): key in the dictionary # indices (list of indexes): Index path, in the case of list values # # Returns: The Provenance of the dict, member or list element # -def node_get_provenance(node, key=None, indices=None): - assert is_node(node) +cpdef ProvenanceInformation node_get_provenance(Node node, str key=None, list indices=None): + assert type(node.value) is dict if key is None: # Retrieving the provenance for this node directly return ProvenanceInformation(node) if key and not indices: - return ProvenanceInformation(node[0].get(key)) + return ProvenanceInformation(node.value.get(key)) - nodeish = node[0].get(key) + cdef Node nodeish = <Node> node.value.get(key) for idx in indices: - nodeish = nodeish[0][idx] + nodeish = <Node> nodeish.value[idx] return ProvenanceInformation(nodeish) @@ -446,57 +523,54 @@ _sentinel = object() # Note: # Returned strings are stripped of leading and trailing whitespace # -def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, allow_none=False): - assert type(node) is Node - +cpdef object node_get(Node node, object expected_type, str key, list indices=None, object default_value=_sentinel, bint allow_none=False): if indices is None: if default_value is _sentinel: - value = node[0].get(key, Node(default_value, None, 0, 0)) + value = node.value.get(key, Node(default_value, _SYNTHETIC_FILE_INDEX, 0, 0)) else: - value = node[0].get(key, Node(default_value, None, 0, next(_SYNTHETIC_COUNTER))) + value = node.value.get(key, Node(default_value, _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter())) - if value[0] is _sentinel: + if value.value is _sentinel: provenance = node_get_provenance(node) raise LoadError(LoadErrorReason.INVALID_DATA, "{}: Dictionary did not contain expected key '{}'".format(provenance, key)) else: # Implied type check of the element itself # No need to synthesise useful node content as we destructure it immediately - value = Node(node_get(node, list, key), None, 0, 0) + value = Node(node_get(node, list, key), _SYNTHETIC_FILE_INDEX, 0, 0) for index in indices: - value = value[0][index] + value = value.value[index] if type(value) is not Node: - value = (value,) + value = Node(value, _SYNTHETIC_FILE_INDEX, 0, 0) # Optionally allow None as a valid value for any type - if value[0] is None and (allow_none or default_value is None): + if value.value is None and (allow_none or default_value is None): return None - if (expected_type is not None) and (not isinstance(value[0], expected_type)): + if (expected_type is not None) and (not isinstance(value.value, expected_type)): # Attempt basic conversions if possible, typically we want to # be able to specify numeric values and convert them to strings, # but we dont want to try converting dicts/lists try: - if (expected_type == bool and isinstance(value[0], str)): + if (expected_type == bool and isinstance(value.value, str)): # Dont coerce booleans to string, this makes "False" strings evaluate to True # We don't structure into full nodes since there's no need. - if value[0] in ('True', 'true'): - value = (True, None, 0, 0) - elif value[0] in ('False', 'false'): - value = (False, None, 0, 0) + if value.value in ('True', 'true'): + value = Node(True, _SYNTHETIC_FILE_INDEX, 0, 0) + elif value.value in ('False', 'false'): + value = Node(False, _SYNTHETIC_FILE_INDEX, 0, 0) else: raise ValueError() elif not (expected_type == list or expected_type == dict or - isinstance(value[0], (list, dict))): - value = (expected_type(value[0]), None, 0, 0) + isinstance(value.value, (list, dict))): + value = Node(expected_type(value.value), _SYNTHETIC_FILE_INDEX, 0, 0) else: raise ValueError() except (ValueError, TypeError): provenance = node_get_provenance(node, key=key, indices=indices) if indices: - path = [key] - path.extend("[{:d}]".format(i) for i in indices) + path = [key, *["[{:d}]".format(i) for i in indices]] path = "".join(path) else: path = key @@ -505,8 +579,8 @@ def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, .format(provenance, path, expected_type.__name__)) # Now collapse lists, and scalars, to their value, leaving nodes as-is - if type(value[0]) is not dict: - value = value[0] + if type(value.value) is not dict: + value = value.value # Trim it at the bud, let all loaded strings from yaml be stripped of whitespace if type(value) is str: @@ -520,17 +594,17 @@ def node_get(node, expected_type, key, indices=None, *, default_value=_sentinel, return value -def __trim_list_provenance(value): - ret = [] +cdef list __trim_list_provenance(list value): + cdef list ret = [] for entry in value: if type(entry) is not Node: - entry = (entry, None, 0, 0) - if type(entry[0]) is list: - ret.append(__trim_list_provenance(entry[0])) - elif type(entry[0]) is dict: + entry = Node(entry, _SYNTHETIC_FILE_INDEX, 0, 0) + if type(entry.value) is list: + ret.append(__trim_list_provenance(entry.value)) + elif type(entry.value) is dict: ret.append(entry) else: - ret.append(entry[0]) + ret.append(entry.value) return ret @@ -541,30 +615,32 @@ def __trim_list_provenance(value): # create entries before using `node_set` # # Args: -# node (tuple): The node +# node (Node): The node # key (str): The key name # value: The value # indices: Any indices to index into the list referenced by key, like in # `node_get` (must be a list of integers) # -def node_set(node, key, value, indices=None): +cpdef void node_set(Node node, object key, object value, list indices=None) except *: + cdef int idx + if indices: - node = node[0][key] + node = <Node> (<dict> node.value)[key] key = indices.pop() for idx in indices: - node = node[0][idx] + node = <Node> (<list> node.value)[idx] if type(value) is Node: - node[0][key] = value + node.value[key] = value else: try: # Need to do this just in case we're modifying a list - old_value = node[0][key] + old_value = <Node> node.value[key] except KeyError: old_value = None if old_value is None: - node[0][key] = Node(value, node[1], node[2], next(_SYNTHETIC_COUNTER)) + node.value[key] = Node(value, node.file_index, node.line, next_synthetic_counter()) else: - node[0][key] = Node(value, old_value[1], old_value[2], old_value[3]) + node.value[key] = Node(value, old_value.file_index, old_value.line, old_value.column) # node_extend_list() @@ -582,23 +658,21 @@ def node_set(node, key, value, indices=None): # key (str): The list name in the node # length (int): The length to extend the list to # default (any): The default value to extend with. -def node_extend_list(node, key, length, default): +def node_extend_list(Node node, str key, Py_ssize_t length, object default): assert type(default) is str or default in ([], {}) - list_node = node[0].get(key) + cdef Node list_node = <Node> node.value.get(key) if list_node is None: - list_node = node[0][key] = Node([], node[1], node[2], next(_SYNTHETIC_COUNTER)) + list_node = node.value[key] = Node([], node.file_index, node.line, next_synthetic_counter()) - assert type(list_node[0]) is list - - the_list = list_node[0] + cdef list the_list = list_node.value def_type = type(default) - file_index = node[1] + file_index = node.file_index if the_list: line_num = the_list[-1][2] else: - line_num = list_node[2] + line_num = list_node.line while length > len(the_list): if def_type is str: @@ -610,7 +684,7 @@ def node_extend_list(node, key, length, default): line_num += 1 - the_list.append(Node(value, file_index, line_num, next(_SYNTHETIC_COUNTER))) + the_list.append(Node(value, file_index, line_num, next_synthetic_counter())) # node_items() @@ -627,16 +701,19 @@ def node_extend_list(node, key, length, default): # def node_items(node): if type(node) is not Node: - node = Node(node, None, 0, 0) - for key, value in node[0].items(): + node = Node(node, _SYNTHETIC_FILE_INDEX, 0, 0) + + cdef str key + + for key, value in node.value.items(): if type(value) is not Node: - value = Node(value, None, 0, 0) - if type(value[0]) is dict: + value = Node(value, _SYNTHETIC_FILE_INDEX, 0, 0) + if type(value.value) is dict: yield (key, value) - elif type(value[0]) is list: - yield (key, __trim_list_provenance(value[0])) + elif type(value.value) is list: + yield (key, __trim_list_provenance(value.value)) else: - yield (key, value[0]) + yield (key, value.value) # node_keys() @@ -650,10 +727,10 @@ def node_items(node): # Yields: # (str): The key name # -def node_keys(node): - if type(node) is not Node: - node = Node(node, None, 0, 0) - yield from node[0].keys() +cpdef list node_keys(object node): + if type(node) is Node: + return list((<Node> node).value.keys()) + return list(node.keys()) # node_del() @@ -666,9 +743,9 @@ def node_keys(node): # key (str): The key we want to remove # safe (bool): Whether to raise a KeyError if unable # -def node_del(node, key, safe=False): +def node_del(Node node, str key, bint safe=False): try: - del node[0][key] + del node.value[key] except KeyError: if not safe: raise @@ -689,7 +766,7 @@ def node_del(node, key, safe=False): def is_node(maybenode): # It's a programming error to give this a Node which isn't a mapping # so assert that. - assert (type(maybenode) is not Node) or (type(maybenode[0]) is dict) + assert (type(maybenode) is not Node) or (type(maybenode.value) is dict) # Now return the type check return type(maybenode) is Node @@ -708,10 +785,11 @@ def is_node(maybenode): # (Node): An empty YAML mapping node, whose provenance is to this new # synthetic file # -def new_synthetic_file(filename, project=None): - file_index = len(_FILE_LIST) - node = Node({}, file_index, 0, 0) - _FILE_LIST.append((filename, +def new_synthetic_file(str filename, object project=None): + cdef Py_ssize_t file_index = len(_FILE_LIST) + cdef Node node = Node({}, file_index, 0, 0) + + _FILE_LIST.append(FileInfo(filename, filename, "<synthetic {}>".format(filename), node, @@ -727,11 +805,11 @@ def new_synthetic_file(filename, project=None): # Returns # (Node): A new empty YAML mapping node # -def new_empty_node(ref_node=None): +def new_empty_node(Node ref_node=None): if ref_node is not None: - return Node({}, ref_node[1], ref_node[2], next(_SYNTHETIC_COUNTER)) + return Node({}, ref_node.file_index, ref_node.line, next_synthetic_counter()) else: - return Node({}, None, 0, 0) + return Node({}, _SYNTHETIC_FILE_INDEX, 0, 0) # new_node_from_dict() @@ -742,8 +820,9 @@ def new_empty_node(ref_node=None): # Returns: # (Node): A new synthetic YAML tree which represents this dictionary # -def new_node_from_dict(indict): - ret = {} +cpdef Node new_node_from_dict(dict indict): + cdef dict ret = {} + cdef str k for k, v in indict.items(): vtype = type(v) if vtype is dict: @@ -751,13 +830,13 @@ def new_node_from_dict(indict): elif vtype is list: ret[k] = __new_node_from_list(v) else: - ret[k] = Node(str(v), None, 0, next(_SYNTHETIC_COUNTER)) - return Node(ret, None, 0, next(_SYNTHETIC_COUNTER)) + ret[k] = Node(str(v), _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter()) + return Node(ret, _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter()) # Internal function to help new_node_from_dict() to handle lists -def __new_node_from_list(inlist): - ret = [] +cdef Node __new_node_from_list(list inlist): + cdef list ret = [] for v in inlist: vtype = type(v) if vtype is dict: @@ -765,8 +844,8 @@ def __new_node_from_list(inlist): elif vtype is list: ret.append(__new_node_from_list(v)) else: - ret.append(Node(str(v), None, 0, next(_SYNTHETIC_COUNTER))) - return Node(ret, None, 0, next(_SYNTHETIC_COUNTER)) + ret.append(Node(str(v), _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter())) + return Node(ret, _SYNTHETIC_FILE_INDEX, 0, next_synthetic_counter()) # _is_composite_list @@ -785,13 +864,13 @@ def __new_node_from_list(inlist): # (LoadError): If node was a mapping and contained a mix of # list composition directives and other keys # -def _is_composite_list(node): +cdef bint _is_composite_list(Node node): + cdef bint has_directives = False + cdef bint has_keys = False + cdef str key - if type(node[0]) is dict: - has_directives = False - has_keys = False - - for key, _ in node_items(node): + if type(node.value) is dict: + for key in node_keys(node): if key in ['(>)', '(<)', '(=)']: # pylint: disable=simplifiable-if-statement has_directives = True else: @@ -816,36 +895,36 @@ def _is_composite_list(node): # target (Node): A composite list # source (Node): A composite list # -def _compose_composite_list(target, source): - clobber = source[0].get("(=)") - prefix = source[0].get("(<)") - suffix = source[0].get("(>)") +cdef void _compose_composite_list(Node target, Node source): + clobber = source.value.get("(=)") + prefix = source.value.get("(<)") + suffix = source.value.get("(>)") if clobber is not None: # We want to clobber the target list # which basically means replacing the target list # with ourselves - target[0]["(=)"] = clobber + target.value["(=)"] = clobber if prefix is not None: - target[0]["(<)"] = prefix - elif "(<)" in target[0]: - target[0]["(<)"][0].clear() + target.value["(<)"] = prefix + elif "(<)" in target.value: + target.value["(<)"].value.clear() if suffix is not None: - target[0]["(>)"] = suffix - elif "(>)" in target[0]: - target[0]["(>)"][0].clear() + target.value["(>)"] = suffix + elif "(>)" in target.value: + target.value["(>)"].value.clear() else: # Not clobbering, so prefix the prefix and suffix the suffix if prefix is not None: - if "(<)" in target[0]: - for v in reversed(prefix[0]): - target[0]["(<)"][0].insert(0, v) + if "(<)" in target.value: + for v in reversed(prefix.value): + target.value["(<)"].value.insert(0, v) else: - target[0]["(<)"] = prefix + target.value["(<)"] = prefix if suffix is not None: - if "(>)" in target[0]: - target[0]["(>)"][0].extend(suffix[0]) + if "(>)" in target.value: + target.value["(>)"].value.extend(suffix.value) else: - target[0]["(>)"] = suffix + target.value["(>)"] = suffix # _compose_list() @@ -857,18 +936,18 @@ def _compose_composite_list(target, source): # target (Node): The target list to be composed into # source (Node): The composition list to be composed from # -def _compose_list(target, source): - clobber = source[0].get("(=)") - prefix = source[0].get("(<)") - suffix = source[0].get("(>)") +cdef void _compose_list(Node target, Node source): + clobber = source.value.get("(=)") + prefix = source.value.get("(<)") + suffix = source.value.get("(>)") if clobber is not None: - target[0].clear() - target[0].extend(clobber[0]) + target.value.clear() + target.value.extend(clobber.value) if prefix is not None: - for v in reversed(prefix[0]): - target[0].insert(0, v) + for v in reversed(prefix.value): + target.value.insert(0, v) if suffix is not None: - target[0].extend(suffix[0]) + target.value.extend(suffix.value) # composite_dict() @@ -882,16 +961,19 @@ def _compose_list(target, source): # # Raises: CompositeError # -def composite_dict(target, source, path=None): +cpdef void composite_dict(Node target, Node source, list path=None) except *: + cdef str k + cdef Node v, target_value + if path is None: path = [] - for k, v in source[0].items(): + for k, v in source.value.items(): path.append(k) - if type(v[0]) is list: + if type(v.value) is list: # List clobbers anything list-like - target_value = target[0].get(k) + target_value = target.value.get(k) if not (target_value is None or - type(target_value[0]) is list or + type(target_value.value) is list or _is_composite_list(target_value)): raise CompositeError(path, "{}: List cannot overwrite {} at: {}" @@ -899,51 +981,51 @@ def composite_dict(target, source, path=None): k, node_get_provenance(target, k))) # Looks good, clobber it - target[0][k] = v + target.value[k] = v elif _is_composite_list(v): - if k not in target[0]: + if k not in target.value: # Composite list clobbers empty space - target[0][k] = v - elif type(target[0][k][0]) is list: + target.value[k] = v + elif type(target.value[k].value) is list: # Composite list composes into a list - _compose_list(target[0][k], v) - elif _is_composite_list(target[0][k]): + _compose_list(target.value[k], v) + elif _is_composite_list(target.value[k]): # Composite list merges into composite list - _compose_composite_list(target[0][k], v) + _compose_composite_list(target.value[k], v) else: # Else composing on top of normal dict or a scalar, so raise... raise CompositeError(path, "{}: Cannot compose lists onto {}".format( node_get_provenance(v), - node_get_provenance(target[0][k]))) - elif type(v[0]) is dict: + node_get_provenance(target.value[k]))) + elif type(v.value) is dict: # We're composing a dict into target now - if k not in target[0]: + if k not in target.value: # Target lacks a dict at that point, make a fresh one with # the same provenance as the incoming dict - target[0][k] = Node({}, v[1], v[2], v[3]) - if type(target[0]) is not dict: + target.value[k] = Node({}, v.file_index, v.line, v.column) + if type(target.value) is not dict: raise CompositeError(path, "{}: Cannot compose dictionary onto {}".format( node_get_provenance(v), - node_get_provenance(target[0][k]))) - composite_dict(target[0][k], v, path) + node_get_provenance(target.value[k]))) + composite_dict(target.value[k], v, path) else: - target_value = target[0].get(k) - if target_value is not None and type(target_value[0]) is not str: + target_value = target.value.get(k) + if target_value is not None and type(target_value.value) is not str: raise CompositeError(path, "{}: Cannot compose scalar on non-scalar at {}".format( node_get_provenance(v), - node_get_provenance(target[0][k]))) - target[0][k] = v + node_get_provenance(target.value[k]))) + target.value[k] = v path.pop() # Like composite_dict(), but raises an all purpose LoadError for convenience # -def composite(target, source): - assert type(source[0]) is dict - assert type(target[0]) is dict +cpdef void composite(Node target, Node source) except *: + assert type(source.value) is dict + assert type(target.value) is dict try: composite_dict(target, source) @@ -961,14 +1043,16 @@ def composite(target, source): # Like composite(target, source), but where target overrides source instead. # -def composite_and_move(target, source): +def composite_and_move(Node target, Node source): composite(source, target) - to_delete = [key for key in target[0].keys() if key not in source[0]] - for key, value in source[0].items(): - target[0][key] = value + cdef str key + cdef Node value + cdef list to_delete = [key for key in target.value.keys() if key not in source.value] + for key, value in source.value.items(): + target.value[key] = value for key in to_delete: - del target[0][key] + del target.value[key] # Types we can short-circuit in node_sanitize for speed. @@ -982,12 +1066,12 @@ __SANITIZE_SHORT_CIRCUIT_TYPES = (int, float, str, bool) # # Only dicts are ordered, list elements are left in order. # -def node_sanitize(node, *, dict_type=OrderedDict): +cpdef object node_sanitize(object node, object dict_type=OrderedDict): node_type = type(node) # If we have an unwrappable node, unwrap it if node_type is Node: - node = node[0] + node = node.value node_type = type(node) # Short-circuit None which occurs ca. twice per element @@ -1015,7 +1099,7 @@ def node_sanitize(node, *, dict_type=OrderedDict): # Sometimes we're handed tuples and we can't be sure what they contain # so we have to sanitize into them elif node_type is tuple: - return tuple((node_sanitize(v, dict_type=dict_type) for v in node)) + return tuple([node_sanitize(v, dict_type=dict_type) for v in node]) # Everything else just gets returned as-is. return node @@ -1028,18 +1112,18 @@ def node_sanitize(node, *, dict_type=OrderedDict): # means a typo which would otherwise not trigger an error). # # Args: -# node (dict): A dictionary loaded from YAML +# node (Node): A dictionary loaded from YAML # valid_keys (list): A list of valid keys for the specified node # # Raises: # LoadError: In the case that the specified node contained # one or more invalid keys # -def node_validate(node, valid_keys): +def node_validate(Node node, list valid_keys): # Probably the fastest way to do this: https://stackoverflow.com/a/23062482 - valid_keys = set(valid_keys) - invalid = next((key for key in node[0] if key not in valid_keys), None) + cdef set valid_keys_set = set(valid_keys) + invalid = next((key for key in node.value if key not in valid_keys_set), None) if invalid: provenance = node_get_provenance(node, key=invalid) @@ -1075,10 +1159,13 @@ __NODE_ASSERT_COMPOSITION_DIRECTIVES = ('(>)', '(<)', '(=)') # Returns: # (Node): A deep copy of source with provenance preserved. # -def node_copy(source): - copy = {} - for key, value in source[0].items(): - value_type = type(value[0]) +cpdef Node node_copy(Node source): + cdef dict copy = {} + cdef str key + cdef Node value + + for key, value in source.value.items(): + value_type = type(value.value) if value_type is dict: copy[key] = node_copy(value) elif value_type is list: @@ -1088,14 +1175,18 @@ def node_copy(source): else: raise ValueError("Unable to be quick about node_copy of {}".format(value_type)) - return Node(copy, source[1], source[2], source[3]) + return Node(copy, source.file_index, source.line, source.column) # Internal function to help node_copy() but for lists. -def _list_copy(source): - copy = [] - for item in source[0]: - item_type = type(item[0]) +cdef Node _list_copy(Node source): + cdef list copy = [] + for item in source.value: + if type(item) is Node: + item_type = type(item.value) + else: + item_type = type(item) + if item_type is dict: copy.append(node_copy(item)) elif item_type is list: @@ -1105,7 +1196,7 @@ def _list_copy(source): else: raise ValueError("Unable to be quick about list_copy of {}".format(item_type)) - return Node(copy, source[1], source[2], source[3]) + return Node(copy, source.file_index, source.line, source.column) # node_final_assertions() @@ -1119,10 +1210,11 @@ def _list_copy(source): # Raises: # (LoadError): If any assertions fail # -def node_final_assertions(node): - assert type(node) is Node +cpdef void node_final_assertions(Node node) except *: + cdef str key + cdef Node value - for key, value in node[0].items(): + for key, value in node.value.items(): # Assert that list composition directives dont remain, this # indicates that the user intended to override a list which @@ -1133,7 +1225,7 @@ def node_final_assertions(node): raise LoadError(LoadErrorReason.TRAILING_LIST_DIRECTIVE, "{}: Attempt to override non-existing list".format(provenance)) - value_type = type(value[0]) + value_type = type(value.value) if value_type is dict: node_final_assertions(value) @@ -1142,9 +1234,9 @@ def node_final_assertions(node): # Helper function for node_final_assertions(), but for lists. -def _list_final_assertions(values): - for value in values[0]: - value_type = type(value[0]) +def _list_final_assertions(Node values): + for value in values.value: + value_type = type(value.value) if value_type is dict: node_final_assertions(value) @@ -1170,12 +1262,12 @@ def _list_final_assertions(values): # Note that dashes are generally preferred for variable names and # usage in YAML, but things such as option names which will be # evaluated with jinja2 cannot use dashes. -def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True): - valid_chars = string.digits + string.ascii_letters + '_' +def assert_symbol_name(ProvenanceInformation provenance, str symbol_name, str purpose, *, bint allow_dashes=True): + cdef str valid_chars = string.digits + string.ascii_letters + '_' if allow_dashes: valid_chars += '-' - valid = True + cdef bint valid = True if not symbol_name: valid = False elif any(x not in valid_chars for x in symbol_name): @@ -1214,13 +1306,11 @@ def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True): # # Returns: # (list): A path from `node` to `target` or None if `target` is not in the subtree -def node_find_target(node, target, *, key=None): - assert type(node) is Node - assert type(target) is Node +cpdef list node_find_target(Node node, Node target, str key=None): if key is not None: - target = target[0][key] + target = target.value[key] - path = [] + cdef list path = [] if _walk_find_target(node, path, target): if key: # Remove key from end of path @@ -1230,19 +1320,22 @@ def node_find_target(node, target, *, key=None): # Helper for node_find_target() which walks a value -def _walk_find_target(node, path, target): - if node[1:] == target[1:]: +cdef bint _walk_find_target(Node node, list path, Node target): + if node.file_index == target.file_index and node.line == target.line and node.column == target.column: return True - elif type(node[0]) is dict: + elif type(node.value) is dict: return _walk_dict_node(node, path, target) - elif type(node[0]) is list: + elif type(node.value) is list: return _walk_list_node(node, path, target) return False # Helper for node_find_target() which walks a list -def _walk_list_node(node, path, target): - for i, v in enumerate(node[0]): +cdef bint _walk_list_node(Node node, list path, Node target): + cdef int i + cdef Node v + + for i, v in enumerate(node.value): path.append(i) if _walk_find_target(v, path, target): return True @@ -1251,8 +1344,11 @@ def _walk_list_node(node, path, target): # Helper for node_find_target() which walks a mapping -def _walk_dict_node(node, path, target): - for k, v in node[0].items(): +cdef bint _walk_dict_node(Node node, list path, Node target): + cdef str k + cdef Node v + + for k, v in node.value.items(): path.append(k) if _walk_find_target(v, path, target): return True diff --git a/src/buildstream/element.py b/src/buildstream/element.py index 08326c6f3..8e006ea6b 100644 --- a/src/buildstream/element.py +++ b/src/buildstream/element.py @@ -931,7 +931,8 @@ class Element(Plugin): (str): The resolved value for *varname*, or None if no variable was declared with the given name. """ - return self.__variables.flat.get(varname) + # Flat is not recognized correctly by Pylint as being a dictionary + return self.__variables.flat.get(varname) # pylint: disable=no-member def batch_prepare_assemble(self, flags, *, collect=None): """ Configure command batching across prepare() and assemble() diff --git a/tests/internals/yaml.py b/tests/internals/yaml.py index dae20b326..6b26fdfaf 100644 --- a/tests/internals/yaml.py +++ b/tests/internals/yaml.py @@ -22,7 +22,7 @@ def test_load_yaml(datafiles): 'basics.yaml') loaded = _yaml.load(filename) - assert loaded[0].get('kind')[0] == 'pony' + assert loaded.value.get('kind').value == 'pony' def assert_provenance(filename, line, col, node, key=None, indices=None): @@ -43,7 +43,7 @@ def test_basic_provenance(datafiles): 'basics.yaml') loaded = _yaml.load(filename) - assert loaded[0].get('kind')[0] == 'pony' + assert loaded.value.get('kind').value == 'pony' assert_provenance(filename, 1, 0, loaded) @@ -56,7 +56,7 @@ def test_member_provenance(datafiles): 'basics.yaml') loaded = _yaml.load(filename) - assert loaded[0].get('kind')[0] == 'pony' + assert loaded.value.get('kind').value == 'pony' assert_provenance(filename, 2, 13, loaded, 'description') @@ -68,7 +68,7 @@ def test_element_provenance(datafiles): 'basics.yaml') loaded = _yaml.load(filename) - assert loaded[0].get('kind')[0] == 'pony' + assert loaded.value.get('kind').value == 'pony' assert_provenance(filename, 5, 2, loaded, 'moods', [1]) @@ -102,7 +102,7 @@ def test_node_get(datafiles): 'basics.yaml') base = _yaml.load(filename) - assert base[0].get('kind')[0] == 'pony' + assert base.value.get('kind').value == 'pony' children = _yaml.node_get(base, list, 'children') assert isinstance(children, list) @@ -522,9 +522,9 @@ def test_node_find_target(datafiles, case): # laid out. Client code should never do this. def _walk(node, entry, rest): if rest: - return _walk(node[0][entry], rest[0], rest[1:]) + return _walk(node.value[entry], rest[0], rest[1:]) else: - return node[0][entry] + return node.value[entry] want = _walk(loaded, case[0], case[1:]) found_path = _yaml.node_find_target(toplevel, want) @@ -4,6 +4,7 @@ [tox] envlist = py{35,36,37} skip_missing_interpreters = true +isolated_build = true # # Defaults for all environments @@ -11,6 +12,10 @@ skip_missing_interpreters = true # Anything specified here is inherited by the sections # [testenv] +usedevelop = + # This is required by Cython in order to get coverage for cython files. + py{35,36,37}-!nocover: True + commands = # Running with coverage reporting enabled py{35,36,37}-!external-!nocover: pytest --basetemp {envtmpdir} --cov=buildstream --cov-config .coveragerc {posargs} @@ -51,6 +56,8 @@ setenv = py{35,36,37}: XDG_CACHE_HOME = {envtmpdir}/cache py{35,36,37}: XDG_CONFIG_HOME = {envtmpdir}/config py{35,36,37}: XDG_DATA_HOME = {envtmpdir}/share + # This is required to get coverage for Cython + py{35,36,37}-!nocover: BST_CYTHON_TRACE = 1 whitelist_externals = py{35,36,37}: @@ -61,7 +68,9 @@ whitelist_externals = # Coverage reporting # [testenv:coverage] -skip_install = true +# This is required by Cython in order to get coverage for cython files. +usedevelop = True + commands = coverage combine --rcfile={toxinidir}/.coveragerc {toxinidir}/.coverage-reports/ coverage html --rcfile={toxinidir}/.coveragerc --directory={toxinidir}/.coverage-reports/ @@ -75,6 +84,10 @@ setenv = # Running linters # [testenv:lint] +commands_pre = + # Build C extensions to allow Pylint to analyse them + {envpython} setup.py build_ext --inplace + commands = pycodestyle pylint src/buildstream tests @@ -147,3 +160,8 @@ deps = -rrequirements/requirements.txt -rrequirements/dev-requirements.txt -rrequirements/plugin-requirements.txt + +# When building using PEP518 and 517, we don't want default dependencies +# installed by the base environment. +[testenv:.package] +deps = |