diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/ansible/cli/galaxy.py | 254 | ||||
-rw-r--r-- | lib/ansible/galaxy/api.py | 44 | ||||
-rw-r--r-- | lib/ansible/galaxy/collection/__init__.py | 1630 | ||||
-rw-r--r-- | lib/ansible/galaxy/collection/concrete_artifact_manager.py | 646 | ||||
-rw-r--r-- | lib/ansible/galaxy/collection/galaxy_api_proxy.py | 107 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/__init__.py | 44 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/dataclasses.py | 435 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/errors.py | 11 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/providers.py | 273 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/reporters.py | 17 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/resolvers.py | 17 | ||||
-rw-r--r-- | lib/ansible/galaxy/dependency_resolution/versioning.py | 5 |
12 files changed, 2465 insertions, 1018 deletions
diff --git a/lib/ansible/cli/galaxy.py b/lib/ansible/cli/galaxy.py index 3daeab1b13..154a6731a1 100644 --- a/lib/ansible/cli/galaxy.py +++ b/lib/ansible/cli/galaxy.py @@ -1,5 +1,5 @@ # Copyright: (c) 2013, James Cammarata <jcammarata@ansible.com> -# Copyright: (c) 2018, Ansible Project +# Copyright: (c) 2018-2021, Ansible Project # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) from __future__ import (absolute_import, division, print_function) @@ -24,7 +24,6 @@ from ansible.galaxy import Galaxy, get_collections_galaxy_meta_info from ansible.galaxy.api import GalaxyAPI from ansible.galaxy.collection import ( build_collection, - CollectionRequirement, download_collections, find_existing_collections, install_collections, @@ -33,6 +32,10 @@ from ansible.galaxy.collection import ( validate_collection_path, verify_collections ) +from ansible.galaxy.collection.concrete_artifact_manager import ( + ConcreteArtifactsManager, +) +from ansible.galaxy.dependency_resolution.dataclasses import Requirement from ansible.galaxy.role import GalaxyRole from ansible.galaxy.token import BasicAuthToken, GalaxyToken, KeycloakToken, NoTokenSentinel @@ -52,6 +55,26 @@ display = Display() urlparse = six.moves.urllib.parse.urlparse +def with_collection_artifacts_manager(wrapped_method): + """Inject an artifacts manager if not passed explicitly. + + This decorator constructs a ConcreteArtifactsManager and maintains + the related temporary directory auto-cleanup around the target + method invocation. + """ + def method_wrapper(*args, **kwargs): + if 'artifacts_manager' in kwargs: + return wrapped_method(*args, **kwargs) + + with ConcreteArtifactsManager.under_tmpdir( + C.DEFAULT_LOCAL_TMP, + validate_certs=not context.CLIARGS['ignore_certs'], + ) as concrete_artifact_cm: + kwargs['artifacts_manager'] = concrete_artifact_cm + return wrapped_method(*args, **kwargs) + return method_wrapper + + def _display_header(path, h1, h2, w1=10, w2=7): display.display('\n# {0}\n{1:{cwidth}} {2:{vwidth}}\n{3} {4}\n'.format( path, @@ -76,20 +99,19 @@ def _display_role(gr): def _display_collection(collection, cwidth=10, vwidth=7, min_cwidth=10, min_vwidth=7): display.display('{fqcn:{cwidth}} {version:{vwidth}}'.format( - fqcn=to_text(collection), - version=collection.latest_version, + fqcn=to_text(collection.fqcn), + version=collection.ver, cwidth=max(cwidth, min_cwidth), # Make sure the width isn't smaller than the header vwidth=max(vwidth, min_vwidth) )) def _get_collection_widths(collections): - if is_iterable(collections): - fqcn_set = set(to_text(c) for c in collections) - version_set = set(to_text(c.latest_version) for c in collections) - else: - fqcn_set = set([to_text(collections)]) - version_set = set([collections.latest_version]) + if not is_iterable(collections): + collections = (collections, ) + + fqcn_set = {to_text(c.fqcn) for c in collections} + version_set = {to_text(c.ver) for c in collections} fqcn_length = len(max(fqcn_set, key=len)) version_length = len(max(version_set, key=len)) @@ -447,7 +469,7 @@ class GalaxyCLI(CLI): # Need to filter out empty strings or non truthy values as an empty server list env var is equal to ['']. server_list = [s for s in C.GALAXY_SERVER_LIST or [] if s] - for server_key in server_list: + for server_priority, server_key in enumerate(server_list, start=1): # Config definitions are looked up dynamically based on the C.GALAXY_SERVER_LIST entry. We look up the # section [galaxy_server.<server>] for the values url, username, password, and token. config_dict = dict((k, server_config_def(server_key, k, req)) for k, req in server_def) @@ -486,7 +508,11 @@ class GalaxyCLI(CLI): server_options['token'] = GalaxyToken(token=token_val) server_options.update(galaxy_options) - config_servers.append(GalaxyAPI(self.galaxy, server_key, **server_options)) + config_servers.append(GalaxyAPI( + self.galaxy, server_key, + priority=server_priority, + **server_options + )) cmd_server = context.CLIARGS['api_server'] cmd_token = GalaxyToken(token=context.CLIARGS['api_key']) @@ -497,15 +523,21 @@ class GalaxyCLI(CLI): if config_server: self.api_servers.append(config_server) else: - self.api_servers.append(GalaxyAPI(self.galaxy, 'cmd_arg', cmd_server, token=cmd_token, - **galaxy_options)) + self.api_servers.append(GalaxyAPI( + self.galaxy, 'cmd_arg', cmd_server, token=cmd_token, + priority=len(config_servers) + 1, + **galaxy_options + )) else: self.api_servers = config_servers # Default to C.GALAXY_SERVER if no servers were defined if len(self.api_servers) == 0: - self.api_servers.append(GalaxyAPI(self.galaxy, 'default', C.GALAXY_SERVER, token=cmd_token, - **galaxy_options)) + self.api_servers.append(GalaxyAPI( + self.galaxy, 'default', C.GALAXY_SERVER, token=cmd_token, + priority=0, + **galaxy_options + )) context.CLIARGS['func']() @@ -530,7 +562,7 @@ class GalaxyCLI(CLI): def _get_default_collection_path(self): return C.COLLECTIONS_PATHS[0] - def _parse_requirements_file(self, requirements_file, allow_old_format=True): + def _parse_requirements_file(self, requirements_file, allow_old_format=True, artifacts_manager=None): """ Parses an Ansible requirement.yml file and returns all the roles and/or collections defined in it. There are 2 requirements file format: @@ -556,6 +588,7 @@ class GalaxyCLI(CLI): :param requirements_file: The path to the requirements file. :param allow_old_format: Will fail if a v1 requirements file is found and this is set to False. + :param artifacts_manager: Artifacts manager. :return: a dict containing roles and collections to found in the requirements file. """ requirements = { @@ -619,33 +652,48 @@ class GalaxyCLI(CLI): for role_req in file_requirements.get('roles') or []: requirements['roles'] += parse_role_req(role_req) - for collection_req in file_requirements.get('collections') or []: - if isinstance(collection_req, dict): - req_name = collection_req.get('name', None) - if req_name is None: - raise AnsibleError("Collections requirement entry should contain the key name.") - - req_type = collection_req.get('type') - if req_type not in ('file', 'galaxy', 'git', 'url', None): - raise AnsibleError("The collection requirement entry key 'type' must be one of file, galaxy, git, or url.") - - req_version = collection_req.get('version', '*') - req_source = collection_req.get('source', None) - if req_source: - # Try and match up the requirement source with our list of Galaxy API servers defined in the - # config, otherwise create a server with that URL without any auth. - req_source = next(iter([a for a in self.api_servers if req_source in [a.name, a.api_server]]), - GalaxyAPI(self.galaxy, - "explicit_requirement_%s" % req_name, - req_source, - validate_certs=not context.CLIARGS['ignore_certs'])) - - requirements['collections'].append((req_name, req_version, req_source, req_type)) - else: - requirements['collections'].append((collection_req, '*', None, None)) + requirements['collections'] = [ + Requirement.from_requirement_dict( + self._init_coll_req_dict(collection_req), + artifacts_manager, + ) + for collection_req in file_requirements.get('collections') or [] + ] return requirements + def _init_coll_req_dict(self, coll_req): + if not isinstance(coll_req, dict): + # Assume it's a string: + return {'name': coll_req} + + if ( + 'name' not in coll_req or + not coll_req.get('source') or + coll_req.get('type', 'galaxy') != 'galaxy' + ): + return coll_req + + # Try and match up the requirement source with our list of Galaxy API + # servers defined in the config, otherwise create a server with that + # URL without any auth. + coll_req['source'] = next( + iter( + srvr for srvr in self.api_servers + if coll_req['source'] in {srvr.name, srvr.api_server} + ), + GalaxyAPI( + self.galaxy, + 'explicit_requirement_{name!s}'.format( + name=coll_req['name'], + ), + coll_req['source'], + validate_certs=not context.CLIARGS['ignore_certs'], + ), + ) + + return coll_req + @staticmethod def exit_without_ignore(rc=1): """ @@ -733,26 +781,29 @@ class GalaxyCLI(CLI): return meta_value - def _require_one_of_collections_requirements(self, collections, requirements_file): + def _require_one_of_collections_requirements( + self, collections, requirements_file, + artifacts_manager=None, + ): if collections and requirements_file: raise AnsibleError("The positional collection_name arg and --requirements-file are mutually exclusive.") elif not collections and not requirements_file: raise AnsibleError("You must specify a collection name or a requirements file.") elif requirements_file: requirements_file = GalaxyCLI._resolve_path(requirements_file) - requirements = self._parse_requirements_file(requirements_file, allow_old_format=False) + requirements = self._parse_requirements_file( + requirements_file, + allow_old_format=False, + artifacts_manager=artifacts_manager, + ) else: - requirements = {'collections': [], 'roles': []} - for collection_input in collections: - requirement = None - if os.path.isfile(to_bytes(collection_input, errors='surrogate_or_strict')) or \ - urlparse(collection_input).scheme.lower() in ['http', 'https'] or \ - collection_input.startswith(('git+', 'git@')): - # Arg is a file path or URL to a collection - name = collection_input - else: - name, dummy, requirement = collection_input.partition(':') - requirements['collections'].append((name, requirement or '*', None, None)) + requirements = { + 'collections': [ + Requirement.from_string(coll_input, artifacts_manager) + for coll_input in collections + ], + 'roles': [], + } return requirements ############################ @@ -792,27 +843,37 @@ class GalaxyCLI(CLI): for collection_path in context.CLIARGS['args']: collection_path = GalaxyCLI._resolve_path(collection_path) - build_collection(collection_path, output_path, force) + build_collection( + to_text(collection_path, errors='surrogate_or_strict'), + to_text(output_path, errors='surrogate_or_strict'), + force, + ) - def execute_download(self): + @with_collection_artifacts_manager + def execute_download(self, artifacts_manager=None): collections = context.CLIARGS['args'] no_deps = context.CLIARGS['no_deps'] download_path = context.CLIARGS['download_path'] - ignore_certs = context.CLIARGS['ignore_certs'] requirements_file = context.CLIARGS['requirements'] if requirements_file: requirements_file = GalaxyCLI._resolve_path(requirements_file) - requirements = self._require_one_of_collections_requirements(collections, requirements_file)['collections'] + requirements = self._require_one_of_collections_requirements( + collections, requirements_file, + artifacts_manager=artifacts_manager, + )['collections'] download_path = GalaxyCLI._resolve_path(download_path) b_download_path = to_bytes(download_path, errors='surrogate_or_strict') if not os.path.exists(b_download_path): os.makedirs(b_download_path) - download_collections(requirements, download_path, self.api_servers, (not ignore_certs), no_deps, - context.CLIARGS['allow_pre_release']) + download_collections( + requirements, download_path, self.api_servers, no_deps, + context.CLIARGS['allow_pre_release'], + artifacts_manager=artifacts_manager, + ) return 0 @@ -1002,29 +1063,38 @@ class GalaxyCLI(CLI): self.pager(data) - def execute_verify(self): + @with_collection_artifacts_manager + def execute_verify(self, artifacts_manager=None): collections = context.CLIARGS['args'] search_paths = context.CLIARGS['collections_path'] - ignore_certs = context.CLIARGS['ignore_certs'] ignore_errors = context.CLIARGS['ignore_errors'] requirements_file = context.CLIARGS['requirements'] - requirements = self._require_one_of_collections_requirements(collections, requirements_file)['collections'] + requirements = self._require_one_of_collections_requirements( + collections, requirements_file, + artifacts_manager=artifacts_manager, + )['collections'] resolved_paths = [validate_collection_path(GalaxyCLI._resolve_path(path)) for path in search_paths] - verify_collections(requirements, resolved_paths, self.api_servers, (not ignore_certs), ignore_errors, - allow_pre_release=True) + verify_collections( + requirements, resolved_paths, + self.api_servers, ignore_errors, + artifacts_manager=artifacts_manager, + ) return 0 - def execute_install(self): + @with_collection_artifacts_manager + def execute_install(self, artifacts_manager=None): """ Install one or more roles(``ansible-galaxy role install``), or one or more collections(``ansible-galaxy collection install``). You can pass in a list (roles or collections) or use the file option listed below (these are mutually exclusive). If you pass in a list, it can be a name (which will be downloaded via the galaxy API and github), or it can be a local tar archive file. + + :param artifacts_manager: Artifacts manager. """ install_items = context.CLIARGS['args'] requirements_file = context.CLIARGS['requirements'] @@ -1042,7 +1112,10 @@ class GalaxyCLI(CLI): role_requirements = [] if context.CLIARGS['type'] == 'collection': collection_path = GalaxyCLI._resolve_path(context.CLIARGS['collections_path']) - requirements = self._require_one_of_collections_requirements(install_items, requirements_file) + requirements = self._require_one_of_collections_requirements( + install_items, requirements_file, + artifacts_manager=artifacts_manager, + ) collection_requirements = requirements['collections'] if requirements['roles']: @@ -1055,7 +1128,10 @@ class GalaxyCLI(CLI): if not (requirements_file.endswith('.yaml') or requirements_file.endswith('.yml')): raise AnsibleError("Invalid role requirements file, it must end with a .yml or .yaml extension") - requirements = self._parse_requirements_file(requirements_file) + requirements = self._parse_requirements_file( + requirements_file, + artifacts_manager=artifacts_manager, + ) role_requirements = requirements['roles'] # We can only install collections and roles at the same time if the type wasn't specified and the -p @@ -1090,11 +1166,15 @@ class GalaxyCLI(CLI): display.display("Starting galaxy collection install process") # Collections can technically be installed even when ansible-galaxy is in role mode so we need to pass in # the install path as context.CLIARGS['collections_path'] won't be set (default is calculated above). - self._execute_install_collection(collection_requirements, collection_path) + self._execute_install_collection( + collection_requirements, collection_path, + artifacts_manager=artifacts_manager, + ) - def _execute_install_collection(self, requirements, path): + def _execute_install_collection( + self, requirements, path, artifacts_manager, + ): force = context.CLIARGS['force'] - ignore_certs = context.CLIARGS['ignore_certs'] ignore_errors = context.CLIARGS['ignore_errors'] no_deps = context.CLIARGS['no_deps'] force_with_deps = context.CLIARGS['force_with_deps'] @@ -1111,8 +1191,12 @@ class GalaxyCLI(CLI): if not os.path.exists(b_output_path): os.makedirs(b_output_path) - install_collections(requirements, output_path, self.api_servers, (not ignore_certs), ignore_errors, - no_deps, force, force_with_deps, allow_pre_release=allow_pre_release) + install_collections( + requirements, output_path, self.api_servers, ignore_errors, + no_deps, force, force_with_deps, + allow_pre_release=allow_pre_release, + artifacts_manager=artifacts_manager, + ) return 0 @@ -1283,9 +1367,12 @@ class GalaxyCLI(CLI): return 0 - def execute_list_collection(self): + @with_collection_artifacts_manager + def execute_list_collection(self, artifacts_manager=None): """ List all collections installed on the local system + + :param artifacts_manager: Artifacts manager. """ collections_search_paths = set(context.CLIARGS['collections_path']) @@ -1328,8 +1415,16 @@ class GalaxyCLI(CLI): continue collection_found = True - collection = CollectionRequirement.from_path(b_collection_path, False, fallback_metadata=True) - fqcn_width, version_width = _get_collection_widths(collection) + + try: + collection = Requirement.from_dir_path_as_unknown( + b_collection_path, + artifacts_manager, + ) + except ValueError as val_err: + six.raise_from(AnsibleError(val_err), val_err) + + fqcn_width, version_width = _get_collection_widths([collection]) _display_header(collection_path, 'Collection', 'Version', fqcn_width, version_width) _display_collection(collection, fqcn_width, version_width) @@ -1339,7 +1434,9 @@ class GalaxyCLI(CLI): collection_path = validate_collection_path(path) if os.path.isdir(collection_path): display.vvv("Searching {0} for collections".format(collection_path)) - collections = find_existing_collections(collection_path, fallback_metadata=True) + collections = list(find_existing_collections( + collection_path, artifacts_manager, + )) else: # There was no 'ansible_collections/' directory in the path, so there # or no collections here. @@ -1355,8 +1452,7 @@ class GalaxyCLI(CLI): _display_header(collection_path, 'Collection', 'Version', fqcn_width, version_width) # Sort collections by the namespace and name - collections.sort(key=to_text) - for collection in collections: + for collection in sorted(collections, key=to_text): _display_collection(collection, fqcn_width, version_width) # Do not warn if the specific collection was found in any of the search paths diff --git a/lib/ansible/galaxy/api.py b/lib/ansible/galaxy/api.py index 2ed4930b25..de5d6cc305 100644 --- a/lib/ansible/galaxy/api.py +++ b/lib/ansible/galaxy/api.py @@ -7,6 +7,7 @@ __metaclass__ = type import collections import datetime +import functools import hashlib import json import os @@ -233,11 +234,17 @@ class CollectionVersionMetadata: self.dependencies = dependencies +@functools.total_ordering class GalaxyAPI: """ This class is meant to be used as a API client for an Ansible Galaxy server """ - def __init__(self, galaxy, name, url, username=None, password=None, token=None, validate_certs=True, - available_api_versions=None, clear_response_cache=False, no_cache=True): + def __init__( + self, galaxy, name, url, + username=None, password=None, token=None, validate_certs=True, + available_api_versions=None, + clear_response_cache=False, no_cache=True, + priority=float('inf'), + ): self.galaxy = galaxy self.name = name self.username = username @@ -246,6 +253,7 @@ class GalaxyAPI: self.api_server = url self.validate_certs = validate_certs self._available_api_versions = available_api_versions or {} + self._priority = priority b_cache_dir = to_bytes(C.config.get_config_value('GALAXY_CACHE_DIR'), errors='surrogate_or_strict') makedirs_safe(b_cache_dir, mode=0o700) @@ -263,6 +271,38 @@ class GalaxyAPI: display.debug('Validate TLS certificates for %s: %s' % (self.api_server, self.validate_certs)) + def __str__(self): + # type: (GalaxyAPI) -> str + """Render GalaxyAPI as a native string representation.""" + return to_native(self.name) + + def __unicode__(self): + # type: (GalaxyAPI) -> unicode + """Render GalaxyAPI as a unicode/text string representation.""" + return to_text(self.name) + + def __repr__(self): + # type: (GalaxyAPI) -> str + """Render GalaxyAPI as an inspectable string representation.""" + return ( + '<{instance!s} "{name!s}" @ {url!s} with priority {priority!s}>'. + format( + instance=self, name=self.name, + priority=self._priority, url=self.api_server, + ) + ) + + def __lt__(self, other_galaxy_api): + # type: (GalaxyAPI, GalaxyAPI) -> Union[bool, 'NotImplemented'] + """Return whether the instance priority is higher than other.""" + if not isinstance(other_galaxy_api, self.__class__): + return NotImplemented + + return ( + self._priority > other_galaxy_api._priority or + self.name < self.name + ) + @property @g_connect(['v1', 'v2', 'v3']) def available_api_versions(self): diff --git a/lib/ansible/galaxy/collection/__init__.py b/lib/ansible/galaxy/collection/__init__.py index 0d7cfc3f78..3f50361c5f 100644 --- a/lib/ansible/galaxy/collection/__init__.py +++ b/lib/ansible/galaxy/collection/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright: (c) 2019, Ansible Project +# Copyright: (c) 2019-2021, Ansible Project # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) """Installed collections management package.""" @@ -9,7 +9,6 @@ __metaclass__ = type import errno import fnmatch import json -import operator import os import shutil import stat @@ -25,30 +24,102 @@ from contextlib import contextmanager from distutils.version import LooseVersion from hashlib import sha256 from io import BytesIO +from itertools import chain +from resolvelib.resolvers import InconsistentCandidate from yaml.error import YAMLError +# NOTE: Adding type ignores is a hack for mypy to shut up wrt bug #1153 try: - import queue + import queue # type: ignore[import] +except ImportError: # Python 2 + import Queue as queue # type: ignore[import,no-redef] + +try: + # NOTE: It's in Python 3 stdlib and can be installed on Python 2 + # NOTE: via `pip install typing`. Unnecessary in runtime. + # NOTE: `TYPE_CHECKING` is True during mypy-typecheck-time. + from typing import TYPE_CHECKING except ImportError: - import Queue as queue # Python 2 + TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Dict, Iterable, List, Optional, Text, Union + if sys.version_info[:2] >= (3, 8): + from typing import Literal + else: # Python 2 + Python 3.4-3.7 + from typing_extensions import Literal + + from ansible.galaxy.api import GalaxyAPI + from ansible.galaxy.collection.concrete_artifact_manager import ( + ConcreteArtifactsManager, + ) + + ManifestKeysType = Literal[ + 'collection_info', 'file_manifest_file', 'format', + ] + FileMetaKeysType = Literal[ + 'name', + 'ftype', + 'chksum_type', + 'chksum_sha256', + 'format', + ] + CollectionInfoKeysType = Literal[ + # collection meta: + 'namespace', 'name', 'version', + 'authors', 'readme', + 'tags', 'description', + 'license', 'license_file', + 'dependencies', + 'repository', 'documentation', + 'homepage', 'issues', + + # files meta: + FileMetaKeysType, + ] + ManifestValueType = Dict[ + CollectionInfoKeysType, + Optional[ + Union[ + int, str, # scalars, like name/ns, schema version + List[str], # lists of scalars, like tags + Dict[str, str], # deps map + ], + ], + ] + CollectionManifestType = Dict[ManifestKeysType, ManifestValueType] + FileManifestEntryType = Dict[FileMetaKeysType, Optional[Union[str, int]]] + FilesManifestType = Dict[ + Literal['files', 'format'], + Union[List[FileManifestEntryType], int], + ] import ansible.constants as C from ansible.errors import AnsibleError from ansible.galaxy import get_collections_galaxy_meta_info -from ansible.galaxy.api import CollectionVersionMetadata, GalaxyError -from ansible.galaxy.user_agent import user_agent -from ansible.module_utils import six +from ansible.galaxy.collection.concrete_artifact_manager import ( + _consume_file, + _download_file, + _get_meta_from_src_dir, + _tarfile_extract, +) +from ansible.galaxy.collection.galaxy_api_proxy import MultiGalaxyAPIProxy +from ansible.galaxy.dependency_resolution import ( + build_collection_dependency_resolver, +) +from ansible.galaxy.dependency_resolution.dataclasses import ( + Candidate, Requirement, +) +from ansible.galaxy.dependency_resolution.errors import ( + CollectionDependencyResolutionImpossible, +) +from ansible.galaxy.dependency_resolution.versioning import meets_requirements +from ansible.module_utils.six import raise_from from ansible.module_utils._text import to_bytes, to_native, to_text from ansible.utils.collection_loader import AnsibleCollectionRef from ansible.utils.display import Display -from ansible.utils.galaxy import scm_archive_collection from ansible.utils.hashing import secure_hash, secure_hash_s from ansible.utils.version import SemanticVersion -from ansible.module_utils.urls import open_url - -urlparse = six.moves.urllib.parse.urlparse -urldefrag = six.moves.urllib.parse.urldefrag -urllib_error = six.moves.urllib.error display = Display() @@ -58,532 +129,145 @@ MANIFEST_FORMAT = 1 ModifiedContent = namedtuple('ModifiedContent', ['filename', 'expected', 'installed']) -class CollectionRequirement: - - _FILE_MAPPING = [(b'MANIFEST.json', 'manifest_file'), (b'FILES.json', 'files_file')] - - def __init__(self, namespace, name, b_path, api, versions, requirement, force, parent=None, metadata=None, - files=None, skip=False, allow_pre_releases=False): - """Represents a collection requirement, the versions that are available to be installed as well as any - dependencies the collection has. - - :param namespace: The collection namespace. - :param name: The collection name. - :param b_path: Byte str of the path to the collection tarball if it has already been downloaded. - :param api: The GalaxyAPI to use if the collection is from Galaxy. - :param versions: A list of versions of the collection that are available. - :param requirement: The version requirement string used to verify the list of versions fit the requirements. - :param force: Whether the force flag applied to the collection. - :param parent: The name of the parent the collection is a dependency of. - :param metadata: The galaxy.api.CollectionVersionMetadata that has already been retrieved from the Galaxy - server. - :param files: The files that exist inside the collection. This is based on the FILES.json file inside the - collection artifact. - :param skip: Whether to skip installing the collection. Should be set if the collection is already installed - and force is not set. - :param allow_pre_releases: Whether to skip pre-release versions of collections. - """ - self.namespace = namespace - self.name = name - self.b_path = b_path - self.api = api - self._versions = set(versions) - self.force = force - self.skip = skip - self.required_by = [] - self.allow_pre_releases = allow_pre_releases - - self._metadata = metadata - self._files = files - - self.add_requirement(parent, requirement) - - def __str__(self): - return to_native("%s.%s" % (self.namespace, self.name)) - - def __unicode__(self): - return u"%s.%s" % (self.namespace, self.name) - - @property - def metadata(self): - self._get_metadata() - return self._metadata - - @property - def versions(self): - if self.allow_pre_releases: - return self._versions - return set(v for v in self._versions if v == '*' or not SemanticVersion(v).is_prerelease) - - @versions.setter - def versions(self, value): - self._versions = set(value) - - @property - def pre_releases(self): - return set(v for v in self._versions if SemanticVersion(v).is_prerelease) - - @property - def latest_version(self): - try: - return max([v for v in self.versions if v != '*'], key=SemanticVersion) - except ValueError: # ValueError: max() arg is an empty sequence - return '*' - - @property - def dependencies(self): - if not self._metadata: - if len(self.versions) > 1: - return {} - self._get_metadata() - - dependencies = self._metadata.dependencies - - if dependencies is None: - return {} - - return dependencies - - @staticmethod - def artifact_info(b_path): - """Load the manifest data from the MANIFEST.json and FILES.json. If the files exist, return a dict containing the keys 'files_file' and 'manifest_file'. - :param b_path: The directory of a collection. - """ - info = {} - for b_file_name, property_name in CollectionRequirement._FILE_MAPPING: - b_file_path = os.path.join(b_path, b_file_name) - if not os.path.exists(b_file_path): - continue - with open(b_file_path, 'rb') as file_obj: - try: - info[property_name] = json.loads(to_text(file_obj.read(), errors='surrogate_or_strict')) - except ValueError: - raise AnsibleError("Collection file at '%s' does not contain a valid json string." % to_native(b_file_path)) - return info - - @staticmethod - def galaxy_metadata(b_path): - """Generate the manifest data from the galaxy.yml file. - If the galaxy.yml exists, return a dictionary containing the keys 'files_file' and 'manifest_file'. - - :param b_path: The directory of a collection. - """ - b_galaxy_path = get_galaxy_metadata_path(b_path) - info = {} - if os.path.exists(b_galaxy_path): - collection_meta = _get_galaxy_yml(b_galaxy_path) - info['files_file'] = _build_files_manifest(b_path, collection_meta['namespace'], collection_meta['name'], collection_meta['build_ignore']) - info['manifest_file'] = _build_manifest(**collection_meta) - return info - - @staticmethod - def collection_info(b_path, fallback_metadata=False): - info = CollectionRequirement.artifact_info(b_path) - if info or not fallback_metadata: - return info - return CollectionRequirement.galaxy_metadata(b_path) - - def add_requirement(self, parent, requirement): - self.required_by.append((parent, requirement)) - new_versions = set(v for v in self.versions if self._meets_requirements(v, requirement, parent)) - if len(new_versions) == 0: - if self.skip: - force_flag = '--force-with-deps' if parent else '--force' - version = self.latest_version if self.latest_version != '*' else 'unknown' - msg = "Cannot meet requirement %s:%s as it is already installed at version '%s'. Use %s to overwrite" \ - % (to_text(self), requirement, version, force_flag) - raise AnsibleError(msg) - elif parent is None: - msg = "Cannot meet requirement %s for dependency %s" % (requirement, to_text(self)) - else: - msg = "Cannot meet dependency requirement '%s:%s' for collection %s" \ - % (to_text(self), requirement, parent) +def verify_local_collection( + local_collection, remote_collection, + artifacts_manager, +): # type: (Candidate, Candidate, ConcreteArtifactsManager) -> None + """Verify integrity of the locally installed collection. - collection_source = to_text(self.b_path, nonstring='passthru') or self.api.api_server - req_by = "\n".join( - "\t%s - '%s:%s'" % (to_text(p) if p else 'base', to_text(self), r) - for p, r in self.required_by - ) + :param local_collection: Collection being checked. + :param remote_collection: Correct collection. + :param artifacts_manager: Artifacts manager. + """ + b_temp_tar_path = ( # NOTE: AnsibleError is raised on URLError + artifacts_manager.get_artifact_path + if remote_collection.is_concrete_artifact + else artifacts_manager.get_galaxy_artifact_path + )(remote_collection) + + b_collection_path = to_bytes( + local_collection.src, errors='surrogate_or_strict', + ) - versions = ", ".join(sorted(self.versions, key=SemanticVersion)) - if not self.versions and self.pre_releases: - pre_release_msg = ( - '\nThis collection only contains pre-releases. Utilize `--pre` to install pre-releases, or ' - 'explicitly provide the pre-release version.' - ) - else: - pre_release_msg = '' + display.vvv("Verifying '{coll!s}'.".format(coll=local_collection)) + display.vvv( + u"Installed collection found at '{path!s}'". + format(path=to_text(local_collection.src)), + ) + display.vvv( + u"Remote collection cached as '{path!s}'". + format(path=to_text(b_temp_tar_path)), + ) - raise AnsibleError( - "%s from source '%s'. Available versions before last requirement added: %s\nRequirements from:\n%s%s" - % (msg, collection_source, versions, req_by, pre_release_msg) + # Compare installed version versus requirement version + if local_collection.ver != remote_collection.ver: + err = ( + "{local_fqcn!s} has the version '{local_ver!s}' but " + "is being compared to '{remote_ver!s}'".format( + local_fqcn=local_collection.fqcn, + local_ver=local_collection.ver, + remote_ver=remote_collection.ver, ) + ) + display.display(err) + return - self.versions = new_versions - - def download(self, b_path): - download_url = self._metadata.download_url - artifact_hash = self._metadata.artifact_sha256 - headers = {} - self.api._add_auth_token(headers, download_url, required=False) - - b_collection_path = _download_file(download_url, b_path, artifact_hash, self.api.validate_certs, - headers=headers) - - return to_text(b_collection_path, errors='surrogate_or_strict') - - def install(self, path, b_temp_path): - if self.skip: - display.display("Skipping '%s' as it is already installed" % to_text(self)) - return - - # Install if it is not - collection_path = os.path.join(path, self.namespace, self.name) - b_collection_path = to_bytes(collection_path, errors='surrogate_or_strict') - display.display("Installing '%s:%s' to '%s'" % (to_text(self), self.latest_version, collection_path)) - - if self.b_path is None: - self.b_path = self.download(b_temp_path) - - if os.path.exists(b_collection_path): - shutil.rmtree(b_collection_path) - - if os.path.isfile(self.b_path): - self.install_artifact(b_collection_path, b_temp_path) - else: - self.install_scm(b_collection_path) - - display.display("%s (%s) was installed successfully" % (to_text(self), self.latest_version)) - - def install_artifact(self, b_collection_path, b_temp_path): - - try: - with tarfile.open(self.b_path, mode='r') as collection_tar: - files_member_obj = collection_tar.getmember('FILES.json') - with _tarfile_extract(collection_tar, files_member_obj) as (dummy, files_obj): - files = json.loads(to_text(files_obj.read(), errors='surrogate_or_strict')) - - _extract_tar_file(collection_tar, 'MANIFEST.json', b_collection_path, b_temp_path) - _extract_tar_file(collection_tar, 'FILES.json', b_collection_path, b_temp_path) - - for file_info in files['files']: - file_name = file_info['name'] - if file_name == '.': - continue - - if file_info['ftype'] == 'file': - _extract_tar_file(collection_tar, file_name, b_collection_path, b_temp_path, - expected_hash=file_info['chksum_sha256']) - - else: - _extract_tar_dir(collection_tar, file_name, b_collection_path) - - except Exception: - # Ensure we don't leave the dir behind in case of a failure. - shutil.rmtree(b_collection_path) - - b_namespace_path = os.path.dirname(b_collection_path) - if not os.listdir(b_namespace_path): - os.rmdir(b_namespace_path) - - raise - - def install_scm(self, b_collection_output_path): - """Install the collection from source control into given dir. - - Generates the Ansible collection artifact data from a galaxy.yml and installs the artifact to a directory. - This should follow the same pattern as build_collection, but instead of creating an artifact, install it. - :param b_collection_output_path: The installation directory for the collection artifact. - :raises AnsibleError: If no collection metadata found. - """ - b_collection_path = self.b_path - - b_galaxy_path = get_galaxy_metadata_path(b_collection_path) - if not os.path.exists(b_galaxy_path): - raise AnsibleError("The collection galaxy.yml path '%s' does not exist." % to_native(b_galaxy_path)) - - info = CollectionRequirement.galaxy_metadata(b_collection_path) - - collection_manifest = info['manifest_file'] - collection_meta = collection_manifest['collection_info'] - file_manifest = info['files_file'] - - _build_collection_dir(b_collection_path, b_collection_output_path, collection_manifest, file_manifest) - - collection_name = "%s.%s" % (collection_manifest['collection_info']['namespace'], - collection_manifest['collection_info']['name']) - display.display('Created collection for %s at %s' % (collection_name, to_text(b_collection_output_path))) - - def set_latest_version(self): - self.versions = set([self.latest_version]) - self._get_metadata() - - def verify(self, remote_collection, path, b_temp_tar_path): - if not self.skip: - display.display("'%s' has not been installed, nothing to verify" % (to_text(self))) - return - - collection_path = os.path.join(path, self.namespace, self.name) - b_collection_path = to_bytes(collection_path, errors='surrogate_or_strict') - - display.vvv("Verifying '%s:%s'." % (to_text(self), self.latest_version)) - display.vvv("Installed collection found at '%s'" % collection_path) - display.vvv("Remote collection found at '%s'" % remote_collection.metadata.download_url) - - # Compare installed version versus requirement version - if self.latest_version != remote_collection.latest_version: - err = "%s has the version '%s' but is being compared to '%s'" % (to_text(self), self.latest_version, remote_collection.latest_version) - display.display(err) - return - - modified_content = [] - - # Verify the manifest hash matches before verifying the file manifest - expected_hash = _get_tar_file_hash(b_temp_tar_path, 'MANIFEST.json') - self._verify_file_hash(b_collection_path, 'MANIFEST.json', expected_hash, modified_content) - manifest = _get_json_from_tar_file(b_temp_tar_path, 'MANIFEST.json') - - # Use the manifest to verify the file manifest checksum - file_manifest_data = manifest['file_manifest_file'] - file_manifest_filename = file_manifest_data['name'] - expected_hash = file_manifest_data['chksum_%s' % file_manifest_data['chksum_type']] - - # Verify the file manifest before using it to verify individual files - self._verify_file_hash(b_collection_path, file_manifest_filename, expected_hash, modified_content) - file_manifest = _get_json_from_tar_file(b_temp_tar_path, file_manifest_filename) - - # Use the file manifest to verify individual file checksums - for manifest_data in file_manifest['files']: - if manifest_data['ftype'] == 'file': - expected_hash = manifest_data['chksum_%s' % manifest_data['chksum_type']] - self._verify_file_hash(b_collection_path, manifest_data['name'], expected_hash, modified_content) - - if modified_content: - display.display("Collection %s contains modified content in the following files:" % to_text(self)) - display.display(to_text(self)) - display.vvv(to_text(self.b_path)) - for content_change in modified_content: - display.display(' %s' % content_change.filename) - display.vvv(" Expected: %s\n Found: %s" % (content_change.expected, content_change.installed)) - else: - display.vvv("Successfully verified that checksums for '%s:%s' match the remote collection" % (to_text(self), self.latest_version)) - - def _verify_file_hash(self, b_path, filename, expected_hash, error_queue): - b_file_path = to_bytes(os.path.join(to_text(b_path), filename), errors='surrogate_or_strict') - - if not os.path.isfile(b_file_path): - actual_hash = None - else: - with open(b_file_path, mode='rb') as file_object: - actual_hash = _consume_file(file_object) - - if expected_hash != actual_hash: - error_queue.append(ModifiedContent(filename=filename, expected=expected_hash, installed=actual_hash)) - - def _get_metadata(self): - if self._metadata: - return - self._metadata = self.api.get_collection_version_metadata(self.namespace, self.name, self.latest_version) - - def _meets_requirements(self, version, requirements, parent): - """ - Supports version identifiers can be '==', '!=', '>', '>=', '<', '<=', '*'. Each requirement is delimited by ',' - """ - op_map = { - '!=': operator.ne, - '==': operator.eq, - '=': operator.eq, - '>=': operator.ge, - '>': operator.gt, - '<=': operator.le, - '<': operator.lt, - } - - for req in list(requirements.split(',')): - op_pos = 2 if len(req) > 1 and req[1] == '=' else 1 - op = op_map.get(req[:op_pos]) - - requirement = req[op_pos:] - if not op: - requirement = req - op = operator.eq - - # In the case we are checking a new requirement on a base requirement (parent != None) we can't accept - # version as '*' (unknown version) unless the requirement is also '*'. - if parent and version == '*' and requirement != '*': - display.warning("Failed to validate the collection requirement '%s:%s' for %s when the existing " - "install does not have a version set, the collection may not work." - % (to_text(self), req, parent)) - continue - elif requirement == '*' or version == '*': - continue - - if not op(SemanticVersion(version), SemanticVersion.from_loose_version(LooseVersion(requirement))): - break - else: - return True - - # The loop was broken early, it does not meet all the requirements - return False - - @staticmethod - def from_tar(b_path, force, parent=None): - if not tarfile.is_tarfile(b_path): - raise AnsibleError("Collection artifact at '%s' is not a valid tar file." % to_native(b_path)) - - info = {} - with tarfile.open(b_path, mode='r') as collection_tar: - for b_member_name, property_name in CollectionRequirement._FILE_MAPPING: - n_member_name = to_native(b_member_name) - try: - member = collection_tar.getmember(n_member_name) - except KeyError: - raise AnsibleError("Collection at '%s' does not contain the required file %s." - % (to_native(b_path), n_member_name)) - - with _tarfile_extract(collection_tar, member) as (dummy, member_obj): - try: - info[property_name] = json.loads(to_text(member_obj.read(), errors='surrogate_or_strict')) - except ValueError: - raise AnsibleError("Collection tar file member %s does not contain a valid json string." - % n_member_name) - - meta = info['manifest_file']['collection_info'] - files = info['files_file']['files'] - - namespace = meta['namespace'] - name = meta['name'] - version = meta['version'] - meta = CollectionVersionMetadata(namespace, name, version, None, None, meta['dependencies']) - - if SemanticVersion(version).is_prerelease: - allow_pre_release = True - else: - allow_pre_release = False - - return CollectionRequirement(namespace, name, b_path, None, [version], version, force, parent=parent, - metadata=meta, files=files, allow_pre_releases=allow_pre_release) - - @staticmethod - def from_path(b_path, force, parent=None, fallback_metadata=False, skip=True): - info = CollectionRequirement.collection_info(b_path, fallback_metadata) - - allow_pre_release = False - if 'manifest_file' in info: - manifest = info['manifest_file']['collection_info'] - namespace = manifest['namespace'] - name = manifest['name'] - version = to_text(manifest['version'], errors='surrogate_or_strict') - - try: - _v = SemanticVersion() - _v.parse(version) - if _v.is_prerelease: - allow_pre_release = True - except ValueError: - display.warning("Collection at '%s' does not have a valid version set, falling back to '*'. Found " - "version: '%s'" % (to_text(b_path), version)) - version = '*' - - dependencies = manifest['dependencies'] - else: - if fallback_metadata: - warning = "Collection at '%s' does not have a galaxy.yml or a MANIFEST.json file, cannot detect version." - else: - warning = "Collection at '%s' does not have a MANIFEST.json file, cannot detect version." - display.warning(warning % to_text(b_path)) - parent_dir, name = os.path.split(to_text(b_path, errors='surrogate_or_strict')) - namespace = os.path.split(parent_dir)[1] - - version = '*' - dependencies = {} - - meta = CollectionVersionMetadata(namespace, name, version, None, None, dependencies) - - files = info.get('files_file', {}).get('files', {}) - - return CollectionRequirement(namespace, name, b_path, None, [version], version, force, parent=parent, - metadata=meta, files=files, skip=skip, allow_pre_releases=allow_pre_release) - - @staticmethod - def from_name(collection, apis, requirement, force, parent=None, allow_pre_release=False): - namespace, name = collection.split('.', 1) - galaxy_meta = None - - for api in apis: - if not (requirement == '*' or requirement.startswith('<') or requirement.startswith('>') or - requirement.startswith('!=')): - # Exact requirement - allow_pre_release = True - - if requirement.startswith('='): - requirement = requirement.lstrip('=') - - try: - resp = api.get_collection_version_metadata(namespace, name, requirement) - except GalaxyError as err: - if err.http_code != 404: - raise - versions = [] - else: - galaxy_meta = resp - versions = [resp.version] - else: - versions = api.get_collection_versions(namespace, name) - - if not versions: - display.vvv("Collection '%s' is not available from server %s %s" % (collection, api.name, - api.api_server)) - continue - - display.vvv("Collection '%s' obtained from server %s %s" % (collection, api.name, api.api_server)) - break - else: - raise AnsibleError("Failed to find collection %s:%s" % (collection, requirement)) - - req = CollectionRequirement(namespace, name, None, api, versions, requirement, force, parent=parent, - metadata=galaxy_meta, allow_pre_releases=allow_pre_release) - return req + modified_content = [] # type: List[ModifiedContent] + + # Verify the manifest hash matches before verifying the file manifest + expected_hash = _get_tar_file_hash(b_temp_tar_path, 'MANIFEST.json') + _verify_file_hash(b_collection_path, 'MANIFEST.json', expected_hash, modified_content) + manifest = _get_json_from_tar_file(b_temp_tar_path, 'MANIFEST.json') + + # Use the manifest to verify the file manifest checksum + file_manifest_data = manifest['file_manifest_file'] + file_manifest_filename = file_manifest_data['name'] + expected_hash = file_manifest_data['chksum_%s' % file_manifest_data['chksum_type']] + + # Verify the file manifest before using it to verify individual files + _verify_file_hash(b_collection_path, file_manifest_filename, expected_hash, modified_content) + file_manifest = _get_json_from_tar_file(b_temp_tar_path, file_manifest_filename) + + # Use the file manifest to verify individual file checksums + for manifest_data in file_manifest['files']: + if manifest_data['ftype'] == 'file': + expected_hash = manifest_data['chksum_%s' % manifest_data['chksum_type']] + _verify_file_hash(b_collection_path, manifest_data['name'], expected_hash, modified_content) + + if modified_content: + display.display( + 'Collection {fqcn!s} contains modified content ' + 'in the following files:'. + format(fqcn=to_text(local_collection.fqcn)), + ) + display.display(to_text(local_collection.fqcn)) + display.vvv(to_text(local_collection.src)) + for content_change in modified_content: + display.display(' %s' % content_change.filename) + display.vvv(" Expected: %s\n Found: %s" % (content_change.expected, content_change.installed)) + # FIXME: Why doesn't this raise a failed return code? + else: + display.vvv( + "Successfully verified that checksums for '{coll!s}' " + 'match the remote collection'. + format(coll=local_collection), + ) -def build_collection(collection_path, output_path, force): +def build_collection(u_collection_path, u_output_path, force): + # type: (Text, Text, bool) -> Text """Creates the Ansible collection artifact in a .tar.gz file. - :param collection_path: The path to the collection to build. This should be the directory that contains the + :param u_collection_path: The path to the collection to build. This should be the directory that contains the galaxy.yml file. - :param output_path: The path to create the collection build artifact. This should be a directory. + :param u_output_path: The path to create the collection build artifact. This should be a directory. :param force: Whether to overwrite an existing collection build artifact or fail. :return: The path to the collection build artifact. """ - b_collection_path = to_bytes(collection_path, errors='surrogate_or_strict') - b_galaxy_path = get_galaxy_metadata_path(b_collection_path) - if not os.path.exists(b_galaxy_path): - raise AnsibleError("The collection galaxy.yml path '%s' does not exist." % to_native(b_galaxy_path)) - - info = CollectionRequirement.galaxy_metadata(b_collection_path) - - collection_manifest = info['manifest_file'] - collection_meta = collection_manifest['collection_info'] - file_manifest = info['files_file'] + b_collection_path = to_bytes(u_collection_path, errors='surrogate_or_strict') + try: + collection_meta = _get_meta_from_src_dir(b_collection_path) + except LookupError as lookup_err: + raise_from(AnsibleError(to_native(lookup_err)), lookup_err) + + collection_manifest = _build_manifest(**collection_meta) + file_manifest = _build_files_manifest( + b_collection_path, + collection_meta['namespace'], # type: ignore[arg-type] + collection_meta['name'], # type: ignore[arg-type] + collection_meta['build_ignore'], # type: ignore[arg-type] + ) - collection_output = os.path.join(output_path, "%s-%s-%s.tar.gz" % (collection_meta['namespace'], - collection_meta['name'], - collection_meta['version'])) + artifact_tarball_file_name = '{ns!s}-{name!s}-{ver!s}.tar.gz'.format( + name=collection_meta['name'], + ns=collection_meta['namespace'], + ver=collection_meta['version'], + ) + b_collection_output = os.path.join( + to_bytes(u_output_path), + to_bytes(artifact_tarball_file_name, errors='surrogate_or_strict'), + ) - b_collection_output = to_bytes(collection_output, errors='surrogate_or_strict') if os.path.exists(b_collection_output): if os.path.isdir(b_collection_output): raise AnsibleError("The output collection artifact '%s' already exists, " - "but is a directory - aborting" % to_native(collection_output)) + "but is a directory - aborting" % to_native(b_collection_output)) elif not force: raise AnsibleError("The file '%s' already exists. You can use --force to re-create " - "the collection artifact." % to_native(collection_output)) + "the collection artifact." % to_native(b_collection_output)) - _build_collection_tar(b_collection_path, b_collection_output, collection_manifest, file_manifest) + collection_output = _build_collection_tar(b_collection_path, b_collection_output, collection_manifest, file_manifest) return collection_output -def download_collections(collections, output_path, apis, validate_certs, no_deps, allow_pre_release): +def download_collections( + collections, # type: Iterable[Requirement] + output_path, # type: str + apis, # type: Iterable[GalaxyAPI] + no_deps, # type: bool + allow_pre_release, # type: bool + artifacts_manager, # type: ConcreteArtifactsManager +): # type: (...) -> None """Download Ansible collections as their tarball from a Galaxy server to the path specified and creates a requirements file of the downloaded requirements to be used for an install. @@ -594,41 +278,88 @@ def download_collections(collections, output_path, apis, validate_certs, no_deps :param no_deps: Ignore any collection dependencies and only download the base requirements. :param allow_pre_release: Do not ignore pre-release versions when selecting the latest. """ - with _tempdir() as b_temp_path: - with _display_progress("Process download dependency map"): - dep_map = _build_dependency_map(collections, [], b_temp_path, apis, validate_certs, True, True, no_deps, - allow_pre_release=allow_pre_release) + with _display_progress("Process download dependency map"): + dep_map = _resolve_depenency_map( + set(collections), + galaxy_apis=apis, + preferred_candidates=None, + concrete_artifacts_manager=artifacts_manager, + no_deps=no_deps, + allow_pre_release=allow_pre_release, + ) + + b_output_path = to_bytes(output_path, errors='surrogate_or_strict') + + requirements = [] + with _display_progress( + "Starting collection download process to '{path!s}'". + format(path=output_path), + ): + for fqcn, concrete_coll_pin in dep_map.copy().items(): # FIXME: move into the provider + if concrete_coll_pin.is_virtual: + display.v( + 'Virtual collection {coll!s} is not downloadable'. + format(coll=to_text(concrete_coll_pin)), + ) + continue - requirements = [] - with _display_progress( - "Starting collection download process to '{path!s}'". - format(path=output_path), - ): - for name, requirement in dep_map.items(): - collection_filename = "%s-%s-%s.tar.gz" % (requirement.namespace, requirement.name, - requirement.latest_version) - dest_path = os.path.join(output_path, collection_filename) - requirements.append({'name': collection_filename, 'version': requirement.latest_version}) - - display.display("Downloading collection '%s' to '%s'" % (name, dest_path)) - - if requirement.api is None and requirement.b_path and os.path.isfile(requirement.b_path): - shutil.copy(requirement.b_path, to_bytes(dest_path, errors='surrogate_or_strict')) - elif requirement.api is None and requirement.b_path: - temp_path = to_text(b_temp_path, errors='surrogate_or_string') - temp_download_path = build_collection(requirement.b_path, temp_path, True) - shutil.move(to_bytes(temp_download_path, errors='surrogate_or_strict'), - to_bytes(dest_path, errors='surrogate_or_strict')) - else: - b_temp_download_path = requirement.download(b_temp_path) - shutil.move(b_temp_download_path, to_bytes(dest_path, errors='surrogate_or_strict')) + display.display( + u"Downloading collection '{coll!s}' to '{path!s}'". + format(coll=to_text(concrete_coll_pin), path=to_text(b_output_path)), + ) + + b_src_path = ( + artifacts_manager.get_artifact_path + if concrete_coll_pin.is_concrete_artifact + else artifacts_manager.get_galaxy_artifact_path + )(concrete_coll_pin) - display.display("%s (%s) was downloaded successfully" % (name, requirement.latest_version)) + b_dest_path = os.path.join( + b_output_path, + os.path.basename(b_src_path), + ) + + if concrete_coll_pin.is_dir: + b_dest_path = to_bytes( + build_collection( + to_text(b_src_path, errors='surrogate_or_strict'), + to_text(output_path, errors='surrogate_or_strict'), + force=True, + ), + errors='surrogate_or_strict', + ) + else: + shutil.copy(to_native(b_src_path), to_native(b_dest_path)) - requirements_path = os.path.join(output_path, 'requirements.yml') - display.display("Writing requirements.yml file of downloaded collections to '%s'" % requirements_path) - with open(to_bytes(requirements_path, errors='surrogate_or_strict'), mode='wb') as req_fd: - req_fd.write(to_bytes(yaml.safe_dump({'collections': requirements}), errors='surrogate_or_strict')) + display.display( + "Collection '{coll!s}' was downloaded successfully". + format(coll=concrete_coll_pin), + ) + requirements.append({ + # FIXME: Consider using a more specific upgraded format + # FIXME: having FQCN in the name field, with src field + # FIXME: pointing to the file path, and explicitly set + # FIXME: type. If version and name are set, it'd + # FIXME: perform validation against the actual metadata + # FIXME: in the artifact src points at. + 'name': to_native(os.path.basename(b_dest_path)), + 'version': concrete_coll_pin.ver, + }) + + requirements_path = os.path.join(output_path, 'requirements.yml') + b_requirements_path = to_bytes( + requirements_path, errors='surrogate_or_strict', + ) + display.display( + u'Writing requirements.yml file of downloaded collections ' + "to '{path!s}'".format(path=to_text(requirements_path)), + ) + yaml_bytes = to_bytes( + yaml.safe_dump({'collections': requirements}), + errors='surrogate_or_strict', + ) + with open(b_requirements_path, mode='wb') as req_fd: + req_fd.write(yaml_bytes) def publish_collection(collection_path, api, wait, timeout): @@ -668,11 +399,20 @@ def publish_collection(collection_path, api, wait, timeout): % (api.name, api.api_server, import_uri)) -def install_collections(collections, output_path, apis, validate_certs, ignore_errors, no_deps, force, force_deps, - allow_pre_release=False): +def install_collections( + collections, # type: Iterable[Requirement] + output_path, # type: str + apis, # type: Iterable[GalaxyAPI] + ignore_errors, # type: bool + no_deps, # type: bool + force, # type: bool + force_deps, # type: bool + allow_pre_release, # type: bool + artifacts_manager, # type: ConcreteArtifactsManager +): # type: (...) -> None """Install Ansible collections to the path specified. - :param collections: The collections to install, should be a list of tuples with (name, requirement, Galaxy server). + :param collections: The collections to install. :param output_path: The path to install the collections to. :param apis: A list of GalaxyAPIs to query when searching for a collection. :param validate_certs: Whether to validate the certificates if downloading a tarball. @@ -681,27 +421,132 @@ def install_collections(collections, output_path, apis, validate_certs, ignore_e :param force: Re-install a collection if it has already been installed. :param force_deps: Re-install a collection as well as its dependencies if they have already been installed. """ - existing_collections = find_existing_collections(output_path, fallback_metadata=True) + existing_collections = { + Requirement(coll.fqcn, coll.ver, coll.src, coll.type) + for coll in find_existing_collections(output_path, artifacts_manager) + } - with _tempdir() as b_temp_path: - with _display_progress("Process install dependency map"): - dependency_map = _build_dependency_map(collections, existing_collections, b_temp_path, apis, - validate_certs, force, force_deps, no_deps, - allow_pre_release=allow_pre_release) + unsatisfied_requirements = set( + chain.from_iterable( + ( + Requirement.from_dir_path(sub_coll, artifacts_manager) + for sub_coll in ( + artifacts_manager. + get_direct_collection_dependencies(install_req). + keys() + ) + ) + if install_req.is_subdirs else (install_req, ) + for install_req in collections + ), + ) + requested_requirements_names = {req.fqcn for req in unsatisfied_requirements} + + # NOTE: Don't attempt to reevaluate already installed deps + # NOTE: unless `--force` or `--force-with-deps` is passed + unsatisfied_requirements -= set() if force or force_deps else { + req + for req in unsatisfied_requirements + for exs in existing_collections + if req.fqcn == exs.fqcn and meets_requirements(exs.ver, req.ver) + } - with _display_progress("Starting collection install process"): - for collection in dependency_map.values(): - try: - collection.install(output_path, b_temp_path) - except AnsibleError as err: - if ignore_errors: - display.warning("Failed to install collection %s but skipping due to --ignore-errors being set. " - "Error: %s" % (to_text(collection), to_text(err))) - else: - raise + if not unsatisfied_requirements: + display.display( + 'Nothing to do. All requested collections are already ' + 'installed. If you want to reinstall them, ' + 'consider using `--force`.' + ) + return + + # FIXME: This probably needs to be improved to + # FIXME: properly match differing src/type. + existing_non_requested_collections = { + coll for coll in existing_collections + if coll.fqcn not in requested_requirements_names + } + + preferred_requirements = ( + [] if force_deps + else existing_non_requested_collections if force + else existing_collections + ) + preferred_collections = { + Candidate(coll.fqcn, coll.ver, coll.src, coll.type) + for coll in preferred_requirements + } + with _display_progress("Process install dependency map"): + try: + dependency_map = _resolve_depenency_map( + collections, + galaxy_apis=apis, + preferred_candidates=preferred_collections, + concrete_artifacts_manager=artifacts_manager, + no_deps=no_deps, + allow_pre_release=allow_pre_release, + ) + except InconsistentCandidate as inconsistent_candidate_exc: + # FIXME: Processing this error is hacky and should be removed along + # FIXME: with implementing the automatic replacement for installed + # FIXME: collections. + if not all( + inconsistent_candidate_exc.candidate.fqcn == r.fqcn + for r in inconsistent_candidate_exc.criterion.iter_requirement() + ): + raise + + req_info = inconsistent_candidate_exc.criterion.information[0] + force_flag = ( + '--force' if req_info.parent is None + else '--force-with-deps' + ) + raise_from( + AnsibleError( + 'Cannot meet requirement {collection!s} as it is already ' + "installed at version '{installed_ver!s}'. " + 'Use {force_flag!s} to overwrite'.format( + collection=req_info.requirement, + force_flag=force_flag, + installed_ver=inconsistent_candidate_exc.candidate.ver, + ) + ), + inconsistent_candidate_exc, + ) + + with _display_progress("Starting collection install process"): + for fqcn, concrete_coll_pin in dependency_map.items(): + if concrete_coll_pin.is_virtual: + display.vvvv( + "Skipping '{coll!s}' as it is virtual". + format(coll=to_text(concrete_coll_pin)), + ) + continue + + if concrete_coll_pin in preferred_collections: + display.display( + "Skipping '{coll!s}' as it is already installed". + format(coll=to_text(concrete_coll_pin)), + ) + continue + + try: + install(concrete_coll_pin, output_path, artifacts_manager) + except AnsibleError as err: + if ignore_errors: + display.warning( + 'Failed to install collection {coll!s} but skipping ' + 'due to --ignore-errors being set. Error: {error!s}'. + format( + coll=to_text(concrete_coll_pin), + error=to_text(err), + ) + ) + else: + raise -def validate_collection_name(name): +# NOTE: imported in ansible.cli.galaxy +def validate_collection_name(name): # type: (str) -> str """Validates the collection name as an input from the user or a requirements file fit the requirements. :param name: The input name with optional range specifier split by ':'. @@ -717,7 +562,8 @@ def validate_collection_name(name): "characters from [a-zA-Z0-9_] only." % name) -def validate_collection_path(collection_path): +# NOTE: imported in ansible.cli.galaxy +def validate_collection_path(collection_path): # type: (str) -> str """Ensure a given path ends with 'ansible_collections' :param collection_path: The path that should end in 'ansible_collections' @@ -730,73 +576,106 @@ def validate_collection_path(collection_path): return collection_path -def verify_collections(collections, search_paths, apis, validate_certs, ignore_errors, allow_pre_release=False): +def verify_collections( + collections, # type: Iterable[Requirement] + search_paths, # type: Iterable[str] + apis, # type: Iterable[GalaxyAPI] + ignore_errors, # type: bool + artifacts_manager, # type: ConcreteArtifactsManager +): # type: (...) -> None + r"""Verify the integrity of locally installed collections. + + :param collections: The collections to check. + :param search_paths: Locations for the local collection lookup. + :param apis: A list of GalaxyAPIs to query when searching for a collection. + :param ignore_errors: Whether to ignore any errors when verifying the collection. + :param artifacts_manager: Artifacts manager. + """ + api_proxy = MultiGalaxyAPIProxy(apis, artifacts_manager) with _display_progress(): - with _tempdir() as b_temp_path: - for collection in collections: + for collection in collections: + try: + if collection.is_concrete_artifact: + raise AnsibleError( + message="'{coll_type!s}' type is not supported. " + 'The format namespace.name is expected.'. + format(coll_type=collection.type) + ) + + # NOTE: Verify local collection exists before + # NOTE: downloading its source artifact from + # NOTE: a galaxy server. + for search_path in search_paths: + b_search_path = to_bytes( + os.path.join( + search_path, + collection.namespace, collection.name, + ), + errors='surrogate_or_strict', + ) + if not os.path.isdir(b_search_path): + continue + + local_collection = Candidate.from_dir_path( + b_search_path, artifacts_manager, + ) + break + else: + raise AnsibleError(message='Collection %s is not installed in any of the collection paths.' % collection.fqcn) + + remote_collection = Candidate( + collection.fqcn, + collection.ver if collection.ver != '*' + else local_collection.ver, + None, 'galaxy', + ) + + # Download collection on a galaxy server for comparison try: + # NOTE: Trigger the lookup. If found, it'll cache + # NOTE: download URL and token in artifact manager. + api_proxy.get_collection_version_metadata( + remote_collection, + ) + except AnsibleError as e: # FIXME: does this actually emit any errors? + # FIXME: extract the actual message and adjust this: + expected_error_msg = ( + 'Failed to find collection {coll.fqcn!s}:{coll.ver!s}'. + format(coll=collection) + ) + if e.message == expected_error_msg: + raise AnsibleError( + 'Failed to find remote collection ' + "'{coll!s}' on any of the galaxy servers". + format(coll=collection) + ) + raise + + verify_local_collection( + local_collection, remote_collection, + artifacts_manager, + ) - local_collection = None - b_collection = to_bytes(collection[0], errors='surrogate_or_strict') - - if os.path.isfile(b_collection) or urlparse(collection[0]).scheme.lower() in ['http', 'https'] or len(collection[0].split('.')) != 2: - raise AnsibleError(message="'%s' is not a valid collection name. The format namespace.name is expected." % collection[0]) - - collection_name = collection[0] - namespace, name = collection_name.split('.') - collection_version = collection[1] - - # Verify local collection exists before downloading it from a galaxy server - for search_path in search_paths: - b_search_path = to_bytes(os.path.join(search_path, namespace, name), errors='surrogate_or_strict') - if os.path.isdir(b_search_path): - if not os.path.isfile(os.path.join(to_text(b_search_path, errors='surrogate_or_strict'), 'MANIFEST.json')): - raise AnsibleError( - message="Collection %s does not appear to have a MANIFEST.json. " % collection_name + - "A MANIFEST.json is expected if the collection has been built and installed via ansible-galaxy." - ) - local_collection = CollectionRequirement.from_path(b_search_path, False) - break - if local_collection is None: - raise AnsibleError(message='Collection %s is not installed in any of the collection paths.' % collection_name) - - # Download collection on a galaxy server for comparison - try: - remote_collection = CollectionRequirement.from_name(collection_name, apis, collection_version, False, parent=None, - allow_pre_release=allow_pre_release) - except AnsibleError as e: - if e.message == 'Failed to find collection %s:%s' % (collection[0], collection[1]): - raise AnsibleError('Failed to find remote collection %s:%s on any of the galaxy servers' % (collection[0], collection[1])) - raise - - download_url = remote_collection.metadata.download_url - headers = {} - remote_collection.api._add_auth_token(headers, download_url, required=False) - b_temp_tar_path = _download_file(download_url, b_temp_path, None, validate_certs, headers=headers) - - local_collection.verify(remote_collection, search_path, b_temp_tar_path) - - except AnsibleError as err: - if ignore_errors: - display.warning("Failed to verify collection %s but skipping due to --ignore-errors being set. " - "Error: %s" % (collection[0], to_text(err))) - else: - raise + except AnsibleError as err: + if ignore_errors: + display.warning( + "Failed to verify collection '{coll!s}' but skipping " + 'due to --ignore-errors being set. ' + 'Error: {err!s}'. + format(coll=collection, err=to_text(err)), + ) + else: + raise @contextmanager def _tempdir(): b_temp_path = tempfile.mkdtemp(dir=to_bytes(C.DEFAULT_LOCAL_TMP, errors='surrogate_or_strict')) - yield b_temp_path - shutil.rmtree(b_temp_path) - - -@contextmanager -def _tarfile_extract(tar, member): - tar_obj = tar.extractfile(member) - yield member, tar_obj - tar_obj.close() + try: + yield b_temp_path + finally: + shutil.rmtree(b_temp_path) @contextmanager @@ -867,70 +746,21 @@ def _display_progress(msg=None): display = old_display -def _get_galaxy_yml(b_galaxy_yml_path): - meta_info = get_collections_galaxy_meta_info() - - mandatory_keys = set() - string_keys = set() - list_keys = set() - dict_keys = set() - - for info in meta_info: - if info.get('required', False): - mandatory_keys.add(info['key']) - - key_list_type = { - 'str': string_keys, - 'list': list_keys, - 'dict': dict_keys, - }[info.get('type', 'str')] - key_list_type.add(info['key']) - - all_keys = frozenset(list(mandatory_keys) + list(string_keys) + list(list_keys) + list(dict_keys)) - - try: - with open(b_galaxy_yml_path, 'rb') as g_yaml: - galaxy_yml = yaml.safe_load(g_yaml) - except YAMLError as err: - raise AnsibleError("Failed to parse the galaxy.yml at '%s' with the following error:\n%s" - % (to_native(b_galaxy_yml_path), to_native(err))) - - set_keys = set(galaxy_yml.keys()) - missing_keys = mandatory_keys.difference(set_keys) - if missing_keys: - raise AnsibleError("The collection galaxy.yml at '%s' is missing the following mandatory keys: %s" - % (to_native(b_galaxy_yml_path), ", ".join(sorted(missing_keys)))) - - extra_keys = set_keys.difference(all_keys) - if len(extra_keys) > 0: - display.warning("Found unknown keys in collection galaxy.yml at '%s': %s" - % (to_text(b_galaxy_yml_path), ", ".join(extra_keys))) - - # Add the defaults if they have not been set - for optional_string in string_keys: - if optional_string not in galaxy_yml: - galaxy_yml[optional_string] = None - - for optional_list in list_keys: - list_val = galaxy_yml.get(optional_list, None) +def _verify_file_hash(b_path, filename, expected_hash, error_queue): + b_file_path = to_bytes(os.path.join(to_text(b_path), filename), errors='surrogate_or_strict') - if list_val is None: - galaxy_yml[optional_list] = [] - elif not isinstance(list_val, list): - galaxy_yml[optional_list] = [list_val] - - for optional_dict in dict_keys: - if optional_dict not in galaxy_yml: - galaxy_yml[optional_dict] = {} - - # license is a builtin var in Python, to avoid confusion we just rename it to license_ids - galaxy_yml['license_ids'] = galaxy_yml['license'] - del galaxy_yml['license'] + if not os.path.isfile(b_file_path): + actual_hash = None + else: + with open(b_file_path, mode='rb') as file_object: + actual_hash = _consume_file(file_object) - return galaxy_yml + if expected_hash != actual_hash: + error_queue.append(ModifiedContent(filename=filename, expected=expected_hash, installed=actual_hash)) def _build_files_manifest(b_collection_path, namespace, name, ignore_patterns): + # type: (bytes, str, str, List[str]) -> FilesManifestType # We always ignore .pyc and .retry files as well as some well known version control directories. The ignore # patterns can be extended by the build_ignore key in galaxy.yml b_ignore_patterns = [ @@ -963,7 +793,7 @@ def _build_files_manifest(b_collection_path, namespace, name, ignore_patterns): }, ], 'format': MANIFEST_FORMAT, - } + } # type: FilesManifestType def _walk(b_path, b_top_level_dir): for b_item in os.listdir(b_path): @@ -1014,9 +844,9 @@ def _build_files_manifest(b_collection_path, namespace, name, ignore_patterns): return manifest -def _build_manifest(namespace, name, version, authors, readme, tags, description, license_ids, license_file, +# FIXME: accept a dict produced from `galaxy.yml` instead of separate args +def _build_manifest(namespace, name, version, authors, readme, tags, description, license_file, dependencies, repository, documentation, homepage, issues, **kwargs): - manifest = { 'collection_info': { 'namespace': namespace, @@ -1026,8 +856,8 @@ def _build_manifest(namespace, name, version, authors, readme, tags, description 'readme': readme, 'tags': tags, 'description': description, - 'license': license_ids, - 'license_file': license_file if license_file else None, # Handle galaxy.yml having an empty string (None) + 'license': kwargs['license'], + 'license_file': license_file or None, # Handle galaxy.yml having an empty string (None) 'dependencies': dependencies, 'repository': repository, 'documentation': documentation, @@ -1047,7 +877,12 @@ def _build_manifest(namespace, name, version, authors, readme, tags, description return manifest -def _build_collection_tar(b_collection_path, b_tar_path, collection_manifest, file_manifest): +def _build_collection_tar( + b_collection_path, # type: bytes + b_tar_path, # type: bytes + collection_manifest, # type: CollectionManifestType + file_manifest, # type: FilesManifestType +): # type: (...) -> Text """Build a tar.gz collection artifact from the manifest data.""" files_manifest_json = to_bytes(json.dumps(file_manifest, indent=True), errors='surrogate_or_strict') collection_manifest['file_manifest_file']['chksum_sha256'] = secure_hash_s(files_manifest_json, hash_func=sha256) @@ -1062,11 +897,11 @@ def _build_collection_tar(b_collection_path, b_tar_path, collection_manifest, fi b_io = BytesIO(b) tar_info = tarfile.TarInfo(name) tar_info.size = len(b) - tar_info.mtime = time.time() + tar_info.mtime = int(time.time()) tar_info.mode = 0o0644 tar_file.addfile(tarinfo=tar_info, fileobj=b_io) - for file_info in file_manifest['files']: + for file_info in file_manifest['files']: # type: ignore[union-attr] if file_info['name'] == '.': continue @@ -1097,12 +932,19 @@ def _build_collection_tar(b_collection_path, b_tar_path, collection_manifest, fi continue # Dealing with a normal file, just add it by name. - tar_file.add(os.path.realpath(b_src_path), arcname=filename, recursive=False, filter=reset_stat) + tar_file.add( + to_native(os.path.realpath(b_src_path)), + arcname=filename, + recursive=False, + filter=reset_stat, + ) - shutil.copy(b_tar_filepath, b_tar_path) + shutil.copy(to_native(b_tar_filepath), to_native(b_tar_path)) collection_name = "%s.%s" % (collection_manifest['collection_info']['namespace'], collection_manifest['collection_info']['name']) - display.display('Created collection for %s at %s' % (collection_name, to_text(b_tar_path))) + tar_path = to_text(b_tar_path) + display.display(u'Created collection for %s at %s' % (collection_name, tar_path)) + return tar_path def _build_collection_dir(b_collection_path, b_collection_output, collection_manifest, file_manifest): @@ -1146,263 +988,159 @@ def _build_collection_dir(b_collection_path, b_collection_output, collection_man shutil.copyfile(src_file, dest_file) os.chmod(dest_file, mode) + collection_output = to_text(b_collection_output) + return collection_output -def find_existing_collections(path, fallback_metadata=False): - collections = [] +def find_existing_collections(path, artifacts_manager): + """Locate all collections under a given path. + :param path: Collection dirs layout search path. + :param artifacts_manager: Artifacts manager. + """ b_path = to_bytes(path, errors='surrogate_or_strict') + + # FIXME: consider using `glob.glob()` to simplify looping for b_namespace in os.listdir(b_path): b_namespace_path = os.path.join(b_path, b_namespace) if os.path.isfile(b_namespace_path): continue + # FIXME: consider feeding b_namespace_path to Candidate.from_dir_path to get subdirs automatically for b_collection in os.listdir(b_namespace_path): b_collection_path = os.path.join(b_namespace_path, b_collection) - if os.path.isdir(b_collection_path): - req = CollectionRequirement.from_path(b_collection_path, False, fallback_metadata=fallback_metadata) - display.vvv("Found installed collection %s:%s at '%s'" % (to_text(req), req.latest_version, - to_text(b_collection_path))) - collections.append(req) - - return collections - - -def _build_dependency_map(collections, existing_collections, b_temp_path, apis, validate_certs, force, force_deps, - no_deps, allow_pre_release=False): - dependency_map = {} - - # First build the dependency map on the actual requirements - for name, version, source, req_type in collections: - _get_collection_info(dependency_map, existing_collections, name, version, source, b_temp_path, apis, - validate_certs, (force or force_deps), allow_pre_release=allow_pre_release, req_type=req_type) - - checked_parents = set([to_text(c) for c in dependency_map.values() if c.skip]) - while len(dependency_map) != len(checked_parents): - while not no_deps: # Only parse dependencies if no_deps was not set - parents_to_check = set(dependency_map.keys()).difference(checked_parents) + if not os.path.isdir(b_collection_path): + continue - deps_exhausted = True - for parent in parents_to_check: - parent_info = dependency_map[parent] + try: + req = Candidate.from_dir_path_as_unknown( + b_collection_path, + artifacts_manager, + ) + except ValueError as val_err: + raise_from(AnsibleError(val_err), val_err) - if parent_info.dependencies: - deps_exhausted = False - for dep_name, dep_requirement in parent_info.dependencies.items(): - _get_collection_info(dependency_map, existing_collections, dep_name, dep_requirement, - None, b_temp_path, apis, validate_certs, force_deps, - parent=parent, allow_pre_release=allow_pre_release) + display.vvv( + u"Found installed collection {coll!s} at '{path!s}'". + format(coll=to_text(req), path=to_text(req.src)) + ) + yield req - checked_parents.add(parent) - # No extra dependencies were resolved, exit loop - if deps_exhausted: - break +def install(collection, path, artifacts_manager): # FIXME: mv to dataclasses? + # type: (Candidate, str, ConcreteArtifactsManager) -> None + """Install a collection under a given path. - # Now we have resolved the deps to our best extent, now select the latest version for collections with - # multiple versions found and go from there - deps_not_checked = set(dependency_map.keys()).difference(checked_parents) - for collection in deps_not_checked: - dependency_map[collection].set_latest_version() - if no_deps or len(dependency_map[collection].dependencies) == 0: - checked_parents.add(collection) - - return dependency_map - - -def _collections_from_scm(collection, requirement, b_temp_path, force, parent=None): - """Returns a list of collections found in the repo. If there is a galaxy.yml in the collection then just return - the specific collection. Otherwise, check each top-level directory for a galaxy.yml. - - :param collection: URI to a git repo - :param requirement: The version of the artifact - :param b_temp_path: The temporary path to the archive of a collection - :param force: Whether to overwrite an existing collection or fail - :param parent: The name of the parent collection - :raises AnsibleError: if nothing found - :return: List of CollectionRequirement objects - :rtype: list + :param collection: Collection to be installed. + :param path: Collection dirs layout path. + :param artifacts_manager: Artifacts manager. """ + b_artifact_path = ( + artifacts_manager.get_artifact_path if collection.is_concrete_artifact + else artifacts_manager.get_galaxy_artifact_path + )(collection) - reqs = [] - name, version, path, fragment = parse_scm(collection, requirement) - b_repo_root = to_bytes(name, errors='surrogate_or_strict') - - b_collection_path = os.path.join(b_temp_path, b_repo_root) - if fragment: - b_fragment = to_bytes(fragment, errors='surrogate_or_strict') - b_collection_path = os.path.join(b_collection_path, b_fragment) - - b_galaxy_path = get_galaxy_metadata_path(b_collection_path) - - err = ("%s appears to be an SCM collection source, but the required galaxy.yml was not found. " - "Append #path/to/collection/ to your URI (before the comma separated version, if one is specified) " - "to point to a directory containing the galaxy.yml or directories of collections" % collection) - - display.vvvvv("Considering %s as a possible path to a collection's galaxy.yml" % b_galaxy_path) - if os.path.exists(b_galaxy_path): - return [CollectionRequirement.from_path(b_collection_path, force, parent, fallback_metadata=True, skip=False)] - - if not os.path.isdir(b_collection_path) or not os.listdir(b_collection_path): - raise AnsibleError(err) - - for b_possible_collection in os.listdir(b_collection_path): - b_collection = os.path.join(b_collection_path, b_possible_collection) - if not os.path.isdir(b_collection): - continue - b_galaxy = get_galaxy_metadata_path(b_collection) - display.vvvvv("Considering %s as a possible path to a collection's galaxy.yml" % b_galaxy) - if os.path.exists(b_galaxy): - reqs.append(CollectionRequirement.from_path(b_collection, force, parent, fallback_metadata=True, skip=False)) - if not reqs: - raise AnsibleError(err) - - return reqs - - -def _get_collection_info(dep_map, existing_collections, collection, requirement, source, b_temp_path, apis, - validate_certs, force, parent=None, allow_pre_release=False, req_type=None): - dep_msg = "" - if parent: - dep_msg = " - as dependency of %s" % parent - display.vvv("Processing requirement collection '%s'%s" % (to_text(collection), dep_msg)) - - b_tar_path = None - - is_file = ( - req_type == 'file' or - (not req_type and os.path.isfile(to_bytes(collection, errors='surrogate_or_strict'))) - ) - - is_url = ( - req_type == 'url' or - (not req_type and urlparse(collection).scheme.lower() in ['http', 'https']) + collection_path = os.path.join(path, collection.namespace, collection.name) + b_collection_path = to_bytes(collection_path, errors='surrogate_or_strict') + display.display( + u"Installing '{coll!s}' to '{path!s}'". + format(coll=to_text(collection), path=collection_path), ) - is_scm = ( - req_type == 'git' or - (not req_type and not b_tar_path and collection.startswith(('git+', 'git@'))) - ) + if os.path.exists(b_collection_path): + shutil.rmtree(b_collection_path) - if is_file: - display.vvvv("Collection requirement '%s' is a tar artifact" % to_text(collection)) - b_tar_path = to_bytes(collection, errors='surrogate_or_strict') - elif is_url: - display.vvvv("Collection requirement '%s' is a URL to a tar artifact" % collection) - try: - b_tar_path = _download_file(collection, b_temp_path, None, validate_certs) - except urllib_error.URLError as err: - raise AnsibleError("Failed to download collection tar from '%s': %s" - % (to_native(collection), to_native(err))) - - if is_scm: - if not collection.startswith('git'): - collection = 'git+' + collection - - name, version, path, fragment = parse_scm(collection, requirement) - b_tar_path = scm_archive_collection(path, name=name, version=version) - - with tarfile.open(b_tar_path, mode='r') as collection_tar: - collection_tar.extractall(path=to_text(b_temp_path)) - - # Ignore requirement if it is set (it must follow semantic versioning, unlike a git version, which is any tree-ish) - # If the requirement was the only place version was set, requirement == version at this point - if requirement not in {"*", ""} and requirement != version: - display.warning( - "The collection {0} appears to be a git repository and two versions were provided: '{1}', and '{2}'. " - "The version {2} is being disregarded.".format(collection, version, requirement) - ) - requirement = "*" - - reqs = _collections_from_scm(collection, requirement, b_temp_path, force, parent) - for req in reqs: - collection_info = get_collection_info_from_req(dep_map, req) - update_dep_map_collection_info(dep_map, existing_collections, collection_info, parent, requirement) + if collection.is_dir: + install_src(collection, b_artifact_path, b_collection_path, artifacts_manager) else: - if b_tar_path: - req = CollectionRequirement.from_tar(b_tar_path, force, parent=parent) - collection_info = get_collection_info_from_req(dep_map, req) - else: - validate_collection_name(collection) - - display.vvvv("Collection requirement '%s' is the name of a collection" % collection) - if collection in dep_map: - collection_info = dep_map[collection] - collection_info.add_requirement(parent, requirement) - else: - apis = [source] if source else apis - collection_info = CollectionRequirement.from_name(collection, apis, requirement, force, parent=parent, - allow_pre_release=allow_pre_release) + install_artifact(b_artifact_path, b_collection_path, artifacts_manager._b_working_directory) - update_dep_map_collection_info(dep_map, existing_collections, collection_info, parent, requirement) + display.display( + '{coll!s} was installed successfully'. + format(coll=to_text(collection)), + ) -def get_collection_info_from_req(dep_map, collection): - collection_name = to_text(collection) - if collection_name in dep_map: - collection_info = dep_map[collection_name] - collection_info.add_requirement(None, collection.latest_version) - else: - collection_info = collection - return collection_info +def install_artifact(b_coll_targz_path, b_collection_path, b_temp_path): + """Install a collection from tarball under a given path. + :param b_coll_targz_path: Collection tarball to be installed. + :param b_collection_path: Collection dirs layout path. + :param b_temp_path: Temporary dir path. + """ + try: + with tarfile.open(b_coll_targz_path, mode='r') as collection_tar: + files_member_obj = collection_tar.getmember('FILES.json') + with _tarfile_extract(collection_tar, files_member_obj) as (dummy, files_obj): + files = json.loads(to_text(files_obj.read(), errors='surrogate_or_strict')) -def update_dep_map_collection_info(dep_map, existing_collections, collection_info, parent, requirement): - existing = [c for c in existing_collections if to_text(c) == to_text(collection_info)] - if existing and not collection_info.force: - # Test that the installed collection fits the requirement - existing[0].add_requirement(parent, requirement) - collection_info = existing[0] + _extract_tar_file(collection_tar, 'MANIFEST.json', b_collection_path, b_temp_path) + _extract_tar_file(collection_tar, 'FILES.json', b_collection_path, b_temp_path) - dep_map[to_text(collection_info)] = collection_info + for file_info in files['files']: + file_name = file_info['name'] + if file_name == '.': + continue + if file_info['ftype'] == 'file': + _extract_tar_file(collection_tar, file_name, b_collection_path, b_temp_path, + expected_hash=file_info['chksum_sha256']) -def parse_scm(collection, version): - if ',' in collection: - collection, version = collection.split(',', 1) - elif version == '*' or not version: - version = 'HEAD' + else: + _extract_tar_dir(collection_tar, file_name, b_collection_path) - if collection.startswith('git+'): - path = collection[4:] - else: - path = collection + except Exception: + # Ensure we don't leave the dir behind in case of a failure. + shutil.rmtree(b_collection_path) - path, fragment = urldefrag(path) - fragment = fragment.strip(os.path.sep) + b_namespace_path = os.path.dirname(b_collection_path) + if not os.listdir(b_namespace_path): + os.rmdir(b_namespace_path) - if path.endswith(os.path.sep + '.git'): - name = path.split(os.path.sep)[-2] - elif '://' not in path and '@' not in path: - name = path - else: - name = path.split('/')[-1] - if name.endswith('.git'): - name = name[:-4] + raise - return name, version, path, fragment +def install_src( + collection, + b_collection_path, b_collection_output_path, + artifacts_manager, +): + r"""Install the collection from source control into given dir. -def _download_file(url, b_path, expected_hash, validate_certs, headers=None): - urlsplit = os.path.splitext(to_text(url.rsplit('/', 1)[1])) - b_file_name = to_bytes(urlsplit[0], errors='surrogate_or_strict') - b_file_ext = to_bytes(urlsplit[1], errors='surrogate_or_strict') - b_file_path = tempfile.NamedTemporaryFile(dir=b_path, prefix=b_file_name, suffix=b_file_ext, delete=False).name + Generates the Ansible collection artifact data from a galaxy.yml and + installs the artifact to a directory. + This should follow the same pattern as build_collection, but instead + of creating an artifact, install it. - display.display("Downloading %s to %s" % (url, to_text(b_path))) - # Galaxy redirs downloads to S3 which reject the request if an Authorization header is attached so don't redir that - resp = open_url(to_native(url, errors='surrogate_or_strict'), validate_certs=validate_certs, headers=headers, - unredirected_headers=['Authorization'], http_agent=user_agent()) + :param collection: Collection to be installed. + :param b_collection_path: Collection dirs layout path. + :param b_collection_output_path: The installation directory for the \ + collection artifact. + :param artifacts_manager: Artifacts manager. - with open(b_file_path, 'wb') as download_file: - actual_hash = _consume_file(resp, download_file) + :raises AnsibleError: If no collection metadata found. + """ + collection_meta = artifacts_manager.get_direct_collection_meta(collection) + + if 'build_ignore' not in collection_meta: # installed collection, not src + # FIXME: optimize this? use a different process? copy instead of build? + collection_meta['build_ignore'] = [] + collection_manifest = _build_manifest(**collection_meta) + file_manifest = _build_files_manifest( + b_collection_path, + collection_meta['namespace'], collection_meta['name'], + collection_meta['build_ignore'], + ) - if expected_hash: - display.vvvv("Validating downloaded file hash %s with expected hash %s" % (actual_hash, expected_hash)) - if expected_hash != actual_hash: - raise AnsibleError("Mismatch artifact hash with downloaded file") + collection_output_path = _build_collection_dir( + b_collection_path, b_collection_output_path, + collection_manifest, file_manifest, + ) - return b_file_path + display.display( + 'Created collection for {coll!s} at {path!s}'. + format(coll=collection, path=collection_output_path) + ) def _extract_tar_dir(tar, dirname, b_dest): @@ -1536,25 +1274,47 @@ def _is_child_path(path, parent_path, link_name=None): return b_path == b_parent_path or b_path.startswith(b_parent_path + to_bytes(os.path.sep)) -def _consume_file(read_from, write_to=None): - bufsize = 65536 - sha256_digest = sha256() - data = read_from.read(bufsize) - while data: - if write_to is not None: - write_to.write(data) - write_to.flush() - sha256_digest.update(data) - data = read_from.read(bufsize) - - return sha256_digest.hexdigest() - - -def get_galaxy_metadata_path(b_path): - b_default_path = os.path.join(b_path, b'galaxy.yml') - candidate_names = [b'galaxy.yml', b'galaxy.yaml'] - for b_name in candidate_names: - b_path = os.path.join(b_path, b_name) - if os.path.exists(b_path): - return b_path - return b_default_path +def _resolve_depenency_map( + requested_requirements, # type: Iterable[Requirement] + galaxy_apis, # type: Iterable[GalaxyAPI] + concrete_artifacts_manager, # type: ConcreteArtifactsManager + preferred_candidates, # type: Optional[Iterable[Candidate]] + no_deps, # type: bool + allow_pre_release, # type: bool +): # type: (...) -> Dict[str, Candidate] + """Return the resolved dependency map.""" + collection_dep_resolver = build_collection_dependency_resolver( + galaxy_apis=galaxy_apis, + concrete_artifacts_manager=concrete_artifacts_manager, + preferred_candidates=preferred_candidates, + with_deps=not no_deps, + with_pre_releases=allow_pre_release, + ) + try: + return collection_dep_resolver.resolve( + requested_requirements, + max_rounds=2000000, # NOTE: same constant pip uses + ).mapping + except CollectionDependencyResolutionImpossible as dep_exc: + conflict_causes = ( + '* {req.fqcn!s}:{req.ver!s} ({dep_origin!s})'.format( + req=req_inf.requirement, + dep_origin='direct request' + if req_inf.parent is None + else 'dependency of {parent!s}'. + format(parent=req_inf.parent), + ) + for req_inf in dep_exc.causes + ) + error_msg_lines = chain( + ( + 'Failed to resolve the requested ' + 'dependencies map. Could not satisfy the following ' + 'requirements:', + ), + conflict_causes, + ) + raise raise_from( # NOTE: Leading "raise" is a hack for mypy bug #9717 + AnsibleError('\n'.join(error_msg_lines)), + dep_exc, + ) diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py new file mode 100644 index 0000000000..33f5129dc7 --- /dev/null +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -0,0 +1,646 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""Concrete collection candidate management helper module.""" + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import json +import os +import tarfile +import subprocess +from contextlib import contextmanager +from hashlib import sha256 +from shutil import rmtree +from tempfile import mkdtemp + +try: + from typing import TYPE_CHECKING +except ImportError: + TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import ( + Any, # FIXME: !!!111 + BinaryIO, Dict, IO, + Iterator, List, Optional, + Set, Tuple, Type, Union, + ) + + from ansible.galaxy.dependency_resolution.dataclasses import ( + Candidate, Requirement, + ) + from ansible.galaxy.token import GalaxyToken + +from ansible.errors import AnsibleError +from ansible.galaxy import get_collections_galaxy_meta_info +from ansible.galaxy.dependency_resolution.dataclasses import _GALAXY_YAML +from ansible.galaxy.user_agent import user_agent +from ansible.module_utils._text import to_bytes, to_native, to_text +from ansible.module_utils.six.moves.urllib.error import URLError +from ansible.module_utils.six.moves.urllib.parse import urldefrag +from ansible.module_utils.six import raise_from +from ansible.module_utils.urls import open_url +from ansible.utils.display import Display + +import yaml + + +display = Display() + + +class ConcreteArtifactsManager: + """Manager for on-disk collection artifacts. + + It is responsible for: + * downloading remote collections from Galaxy-compatible servers and + direct links to tarballs or SCM repositories + * keeping track of local ones + * keeping track of Galaxy API tokens for downloads from Galaxy'ish + as well as the artifact hashes + * caching all of above + * retrieving the metadata out of the downloaded artifacts + """ + + def __init__(self, b_working_directory, validate_certs=True): + # type: (bytes, bool) -> None + """Initialize ConcreteArtifactsManager caches and costraints.""" + self._validate_certs = validate_certs # type: bool + self._artifact_cache = {} # type: Dict[bytes, bytes] + self._galaxy_artifact_cache = {} # type: Dict[Union[Candidate, Requirement], bytes] + self._artifact_meta_cache = {} # type: Dict[bytes, Dict[str, Optional[Union[str, List[str], Dict[str, str]]]]] + self._galaxy_collection_cache = {} # type: Dict[Union[Candidate, Requirement], Tuple[str, str, GalaxyToken]] + self._b_working_directory = b_working_directory # type: bytes + + def get_galaxy_artifact_path(self, collection): + # type: (Union[Candidate, Requirement]) -> bytes + """Given a Galaxy-stored collection, return a cached path. + + If it's not yet on disk, this method downloads the artifact first. + """ + try: + return self._galaxy_artifact_cache[collection] + except KeyError: + pass + + try: + url, sha256_hash, token = self._galaxy_collection_cache[collection] + except KeyError as key_err: + raise_from( + RuntimeError( + 'The is no known source for {coll!s}'. + format(coll=collection), + ), + key_err, + ) + + display.vvvv( + "Fetching a collection tarball for '{collection!s}' from " + 'Ansible Galaxy'.format(collection=collection), + ) + + try: + b_artifact_path = _download_file( + url, + self._b_working_directory, + expected_hash=sha256_hash, + validate_certs=self._validate_certs, + token=token, + ) # type: bytes + except URLError as err: + raise_from( + AnsibleError( + 'Failed to download collection tar ' + "from '{coll_src!s}': {download_err!s}". + format( + coll_src=to_native(collection.src), + download_err=to_native(err), + ), + ), + err, + ) + else: + display.vvv( + "Collection '{coll!s}' obtained from " + 'server {server!s} {url!s}'.format( + coll=collection, server=collection.src or 'Galaxy', + url=collection.src.api_server if collection.src is not None + else '', + ) + ) + + self._galaxy_artifact_cache[collection] = b_artifact_path + return b_artifact_path + + def get_artifact_path(self, collection): + # type: (Union[Candidate, Requirement]) -> bytes + """Given a concrete collection pointer, return a cached path. + + If it's not yet on disk, this method downloads the artifact first. + """ + try: + return self._artifact_cache[collection.src] + except KeyError: + pass + + # NOTE: SCM needs to be special-cased as it may contain either + # NOTE: one collection in its root, or a number of top-level + # NOTE: collection directories instead. + # NOTE: The idea is to store the SCM collection as unpacked + # NOTE: directory structure under the temporary location and use + # NOTE: a "virtual" collection that has pinned requirements on + # NOTE: the directories under that SCM checkout that correspond + # NOTE: to collections. + # NOTE: This brings us to the idea that we need two separate + # NOTE: virtual Requirement/Candidate types -- + # NOTE: (single) dir + (multidir) subdirs + if collection.is_url: + display.vvvv( + "Collection requirement '{collection!s}' is a URL " + 'to a tar artifact'.format(collection=collection.fqcn), + ) + try: + b_artifact_path = _download_file( + collection.src, + self._b_working_directory, + expected_hash=None, # NOTE: URLs don't support checksums + validate_certs=self._validate_certs, + ) + except URLError as err: + raise_from( + AnsibleError( + 'Failed to download collection tar ' + "from '{coll_src!s}': {download_err!s}". + format( + coll_src=to_native(collection.src), + download_err=to_native(err), + ), + ), + err, + ) + elif collection.is_scm: + b_artifact_path = _extract_collection_from_git( + collection.src, + collection.ver, + self._b_working_directory, + ) + elif collection.is_file or collection.is_dir or collection.is_subdirs: + b_artifact_path = to_bytes(collection.src) + else: + # NOTE: This may happen `if collection.is_online_index_pointer` + raise RuntimeError( + 'The artifact is of an unexpected type {art_type!s}'. + format(art_type=collection.type) + ) + + self._artifact_cache[collection.src] = b_artifact_path + return b_artifact_path + + def _get_direct_collection_namespace(self, collection): + # type: (Candidate) -> Optional[str] + return self.get_direct_collection_meta(collection)['namespace'] # type: ignore[return-value] + + def _get_direct_collection_name(self, collection): + # type: (Candidate) -> Optional[str] + return self.get_direct_collection_meta(collection)['name'] # type: ignore[return-value] + + def get_direct_collection_fqcn(self, collection): + # type: (Candidate) -> Optional[str] + """Extract FQCN from the given on-disk collection artifact. + + If the collection is virtual, ``None`` is returned instead + of a string. + """ + if collection.is_virtual: + # NOTE: should it be something like "<virtual>"? + return None + + return '.'.join(( # type: ignore[type-var] + self._get_direct_collection_namespace(collection), # type: ignore[arg-type] + self._get_direct_collection_name(collection), + )) + + def get_direct_collection_version(self, collection): + # type: (Union[Candidate, Requirement]) -> str + """Extract version from the given on-disk collection artifact.""" + return self.get_direct_collection_meta(collection)['version'] # type: ignore[return-value] + + def get_direct_collection_dependencies(self, collection): + # type: (Union[Candidate, Requirement]) -> Dict[str, str] + """Extract deps from the given on-disk collection artifact.""" + return self.get_direct_collection_meta(collection)['dependencies'] # type: ignore[return-value] + + def get_direct_collection_meta(self, collection): + # type: (Union[Candidate, Requirement]) -> Dict[str, Optional[Union[str, Dict[str, str], List[str]]]] + """Extract meta from the given on-disk collection artifact.""" + try: # FIXME: use unique collection identifier as a cache key? + return self._artifact_meta_cache[collection.src] + except KeyError: + b_artifact_path = self.get_artifact_path(collection) + + if collection.is_url or collection.is_file: + collection_meta = _get_meta_from_tar(b_artifact_path) + elif collection.is_dir: # should we just build a coll instead? + # FIXME: what if there's subdirs? + try: + collection_meta = _get_meta_from_dir(b_artifact_path) + except LookupError as lookup_err: + raise_from( + AnsibleError( + 'Failed to find the collection dir deps: {err!s}'. + format(err=to_native(lookup_err)), + ), + lookup_err, + ) + elif collection.is_scm: + collection_meta = { + 'name': None, + 'namespace': None, + 'dependencies': {to_native(b_artifact_path): '*'}, + 'version': '*', + } + elif collection.is_subdirs: + collection_meta = { + 'name': None, + 'namespace': None, + # NOTE: Dropping b_artifact_path since it's based on src anyway + 'dependencies': dict.fromkeys( + map(to_native, collection.namespace_collection_paths), + '*', + ), + 'version': '*', + } + else: + raise RuntimeError + + self._artifact_meta_cache[collection.src] = collection_meta + return collection_meta + + def save_collection_source(self, collection, url, sha256_hash, token): + # type: (Candidate, str, str, GalaxyToken) -> None + """Store collection URL, SHA256 hash and Galaxy API token. + + This is a hook that is supposed to be called before attempting to + download Galaxy-based collections with ``get_galaxy_artifact_path()``. + """ + self._galaxy_collection_cache[collection] = url, sha256_hash, token + + @classmethod + @contextmanager + def under_tmpdir( + cls, # type: Type[ConcreteArtifactsManager] + temp_dir_base, # type: str + validate_certs=True, # type: bool + ): # type: (...) -> Iterator[ConcreteArtifactsManager] + """Custom ConcreteArtifactsManager constructor with temp dir. + + This method returns a context manager that allocates and cleans + up a temporary directory for caching the collection artifacts + during the dependency resolution process. + """ + # NOTE: Can't use `with tempfile.TemporaryDirectory:` + # NOTE: because it's not in Python 2 stdlib. + temp_path = mkdtemp( + dir=to_bytes(temp_dir_base, errors='surrogate_or_strict'), + ) + b_temp_path = to_bytes(temp_path, errors='surrogate_or_strict') + try: + yield cls(b_temp_path, validate_certs) + finally: + rmtree(b_temp_path) + + +def parse_scm(collection, version): + """Extract name, version, path and subdir out of the SCM pointer.""" + if ',' in collection: + collection, version = collection.split(',', 1) + elif version == '*' or not version: + version = 'HEAD' + + if collection.startswith('git+'): + path = collection[4:] + else: + path = collection + + path, fragment = urldefrag(path) + fragment = fragment.strip(os.path.sep) + + if path.endswith(os.path.sep + '.git'): + name = path.split(os.path.sep)[-2] + elif '://' not in path and '@' not in path: + name = path + else: + name = path.split('/')[-1] + if name.endswith('.git'): + name = name[:-4] + + return name, version, path, fragment + + +def _extract_collection_from_git(repo_url, coll_ver, b_path): + name, version, git_url, fragment = parse_scm(repo_url, coll_ver) + b_checkout_path = mkdtemp( + dir=b_path, + prefix=to_bytes(name, errors='surrogate_or_strict'), + ) # type: bytes + git_clone_cmd = 'git', 'clone', git_url, to_text(b_checkout_path) + # FIXME: '--depth', '1', '--branch', version + try: + subprocess.check_call(git_clone_cmd) + except subprocess.CalledProcessError as proc_err: + raise_from( + AnsibleError( # should probably be LookupError + 'Failed to clone a Git repository from `{repo_url!s}`.'. + format(repo_url=to_native(git_url)), + ), + proc_err, + ) + + git_switch_cmd = 'git', 'checkout', to_text(version) + try: + subprocess.check_call(git_switch_cmd, cwd=b_checkout_path) + except subprocess.CalledProcessError as proc_err: + raise_from( + AnsibleError( # should probably be LookupError + 'Failed to switch a cloned Git repo `{repo_url!s}` ' + 'to the requested revision `{commitish!s}`.'. + format( + commitish=to_native(version), + repo_url=to_native(git_url), + ), + ), + proc_err, + ) + + return ( + os.path.join(b_checkout_path, to_bytes(fragment)) + if fragment else b_checkout_path + ) + + +# FIXME: use random subdirs while preserving the file names +def _download_file(url, b_path, expected_hash, validate_certs, token=None): + # type: (str, bytes, Optional[str], bool, GalaxyToken) -> bytes + # ^ NOTE: used in download and verify_collections ^ + b_tarball_name = to_bytes( + url.rsplit('/', 1)[1], errors='surrogate_or_strict', + ) + b_file_name = b_tarball_name[:-len('.tar.gz')] + + b_tarball_dir = mkdtemp( + dir=b_path, + prefix=b'-'.join((b_file_name, b'')), + ) # type: bytes + + b_file_path = os.path.join(b_tarball_dir, b_tarball_name) + + display.display("Downloading %s to %s" % (url, to_text(b_tarball_dir))) + # NOTE: Galaxy redirects downloads to S3 which rejects the request + # NOTE: if an Authorization header is attached so don't redirect it + resp = open_url( + to_native(url, errors='surrogate_or_strict'), + validate_certs=validate_certs, + headers=None if token is None else token.headers(), + unredirected_headers=['Authorization'], http_agent=user_agent(), + ) + + with open(b_file_path, 'wb') as download_file: # type: BinaryIO + actual_hash = _consume_file(resp, write_to=download_file) + + if expected_hash: + display.vvvv( + 'Validating downloaded file hash {actual_hash!s} with ' + 'expected hash {expected_hash!s}'. + format(actual_hash=actual_hash, expected_hash=expected_hash) + ) + if expected_hash != actual_hash: + raise AnsibleError('Mismatch artifact hash with downloaded file') + + return b_file_path + + +def _consume_file(read_from, write_to=None): + # type: (BinaryIO, BinaryIO) -> str + bufsize = 65536 + sha256_digest = sha256() + data = read_from.read(bufsize) + while data: + if write_to is not None: + write_to.write(data) + write_to.flush() + sha256_digest.update(data) + data = read_from.read(bufsize) + + return sha256_digest.hexdigest() + + +def _normalize_galaxy_yml_manifest( + galaxy_yml, # type: Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + b_galaxy_yml_path, # type: bytes +): + # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + galaxy_yml_schema = ( + get_collections_galaxy_meta_info() + ) # type: List[Dict[str, Any]] # FIXME: <-- + # FIXME: 👆maybe precise type: List[Dict[str, Union[bool, str, List[str]]]] + + mandatory_keys = set() + string_keys = set() # type: Set[str] + list_keys = set() # type: Set[str] + dict_keys = set() # type: Set[str] + + for info in galaxy_yml_schema: + if info.get('required', False): + mandatory_keys.add(info['key']) + + key_list_type = { + 'str': string_keys, + 'list': list_keys, + 'dict': dict_keys, + }[info.get('type', 'str')] + key_list_type.add(info['key']) + + all_keys = frozenset(list(mandatory_keys) + list(string_keys) + list(list_keys) + list(dict_keys)) + + set_keys = set(galaxy_yml.keys()) + missing_keys = mandatory_keys.difference(set_keys) + if missing_keys: + raise AnsibleError("The collection galaxy.yml at '%s' is missing the following mandatory keys: %s" + % (to_native(b_galaxy_yml_path), ", ".join(sorted(missing_keys)))) + + extra_keys = set_keys.difference(all_keys) + if len(extra_keys) > 0: + display.warning("Found unknown keys in collection galaxy.yml at '%s': %s" + % (to_text(b_galaxy_yml_path), ", ".join(extra_keys))) + + # Add the defaults if they have not been set + for optional_string in string_keys: + if optional_string not in galaxy_yml: + galaxy_yml[optional_string] = None + + for optional_list in list_keys: + list_val = galaxy_yml.get(optional_list, None) + + if list_val is None: + galaxy_yml[optional_list] = [] + elif not isinstance(list_val, list): + galaxy_yml[optional_list] = [list_val] # type: ignore[list-item] + + for optional_dict in dict_keys: + if optional_dict not in galaxy_yml: + galaxy_yml[optional_dict] = {} + + # NOTE: `version: null` is only allowed for `galaxy.yml` + # NOTE: and not `MANIFEST.json`. The use-case for it is collections + # NOTE: that generate the version from Git before building a + # NOTE: distributable tarball artifact. + if not galaxy_yml.get('version'): + galaxy_yml['version'] = '*' + + return galaxy_yml + + +def _get_meta_from_dir( + b_path, # type: bytes +): # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + try: + return _get_meta_from_installed_dir(b_path) + except LookupError: + return _get_meta_from_src_dir(b_path) + + +def _get_meta_from_src_dir( + b_path, # type: bytes +): # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + galaxy_yml = os.path.join(b_path, _GALAXY_YAML) + if not os.path.isfile(galaxy_yml): + raise LookupError( + "The collection galaxy.yml path '{path!s}' does not exist.". + format(path=to_native(galaxy_yml)) + ) + + with open(galaxy_yml, 'rb') as manifest_file_obj: + try: + manifest = yaml.safe_load(manifest_file_obj) + except yaml.error.YAMLError as yaml_err: + raise_from( + AnsibleError( + "Failed to parse the galaxy.yml at '{path!s}' with " + 'the following error:\n{err_txt!s}'. + format( + path=to_native(galaxy_yml), + err_txt=to_native(yaml_err), + ), + ), + yaml_err, + ) + + return _normalize_galaxy_yml_manifest(manifest, galaxy_yml) + + +def _get_meta_from_installed_dir( + b_path, # type: bytes +): # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + n_manifest_json = 'MANIFEST.json' + b_manifest_json = to_bytes(n_manifest_json) + b_manifest_json_path = os.path.join(b_path, b_manifest_json) + + try: + with open(b_manifest_json_path, 'rb') as manifest_fd: + b_manifest_txt = manifest_fd.read() + except (IOError, OSError): + raise LookupError( + "The collection {manifest!s} path '{path!s}' does not exist.". + format( + manifest=n_manifest_json, + path=to_native(b_manifest_json_path), + ) + ) + + manifest_txt = to_text(b_manifest_txt, errors='surrogate_or_strict') + + try: + manifest = json.loads(manifest_txt) + except ValueError: + raise AnsibleError( + 'Collection tar file member {member!s} does not ' + 'contain a valid json string.'. + format(member=n_manifest_json), + ) + else: + collection_info = manifest['collection_info'] + + version = collection_info.get('version') + if not version: + raise AnsibleError( + u'Collection metadata file at `{meta_file!s}` is expected ' + u'to have a valid SemVer version value but got {version!s}'. + format( + meta_file=to_text(b_manifest_json_path), + version=to_text(repr(version)), + ), + ) + + return collection_info + + +def _get_meta_from_tar( + b_path, # type: bytes +): # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + if not tarfile.is_tarfile(b_path): + raise AnsibleError( + "Collection artifact at '{path!s}' is not a valid tar file.". + format(path=to_native(b_path)), + ) + + n_manifest_json = 'MANIFEST.json' + + with tarfile.open(b_path, mode='r') as collection_tar: # type: tarfile.TarFile + try: + member = collection_tar.getmember(n_manifest_json) + except KeyError: + raise AnsibleError( + "Collection at '{path!s}' does not contain the " + 'required file {manifest_file!s}.'. + format( + path=to_native(b_path), + manifest_file=n_manifest_json, + ), + ) + + with _tarfile_extract(collection_tar, member) as (_member, member_obj): + if member_obj is None: + raise AnsibleError( + 'Collection tar file does not contain ' + 'member {member!s}'.format(member=n_manifest_json), + ) + + text_content = to_text( + member_obj.read(), + errors='surrogate_or_strict', + ) + + try: + manifest = json.loads(text_content) + except ValueError: + raise AnsibleError( + 'Collection tar file member {member!s} does not ' + 'contain a valid json string.'. + format(member=n_manifest_json), + ) + return manifest['collection_info'] + + +@contextmanager +def _tarfile_extract( + tar, # type: tarfile.TarFile + member, # type: tarfile.TarInfo +): + # type: (...) -> Iterator[Tuple[tarfile.TarInfo, Optional[IO[bytes]]]] + tar_obj = tar.extractfile(member) + try: + yield member, tar_obj + finally: + if tar_obj is not None: + tar_obj.close() diff --git a/lib/ansible/galaxy/collection/galaxy_api_proxy.py b/lib/ansible/galaxy/collection/galaxy_api_proxy.py new file mode 100644 index 0000000000..fb4cd5de02 --- /dev/null +++ b/lib/ansible/galaxy/collection/galaxy_api_proxy.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""A facade for interfacing with multiple Galaxy instances.""" + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import os + +try: + from typing import TYPE_CHECKING +except ImportError: + TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Dict, Iterable, Tuple + from ansible.galaxy.api import CollectionVersionMetadata + from ansible.galaxy.collection.concrete_artifact_manager import ( + ConcreteArtifactsManager, + ) + from ansible.galaxy.dependency_resolution.dataclasses import ( + Candidate, Requirement, + ) + +from ansible.galaxy.api import GalaxyAPI, GalaxyError + + +class MultiGalaxyAPIProxy: + """A proxy that abstracts talking to multiple Galaxy instances.""" + + def __init__(self, apis, concrete_artifacts_manager): + # type: (Iterable[GalaxyAPI], ConcreteArtifactsManager) -> None + """Initialize the target APIs list.""" + self._apis = apis + self._concrete_art_mgr = concrete_artifacts_manager + + def get_collection_versions(self, requirement): + # type: (Requirement) -> Iterable[Tuple[str, GalaxyAPI]] + """Get a set of unique versions for FQCN on Galaxy servers.""" + if requirement.is_concrete_artifact: + return { + ( + self._concrete_art_mgr. + get_direct_collection_version(requirement), + requirement.src, + ), + } + + api_lookup_order = ( + (requirement.src, ) + if isinstance(requirement.src, GalaxyAPI) + else self._apis + ) + return set( + (version, api) + for api in api_lookup_order + for version in api.get_collection_versions( + requirement.namespace, requirement.name, + ) + ) + + def get_collection_version_metadata(self, collection_candidate): + # type: (Candidate) -> CollectionVersionMetadata + """Retrieve collection metadata of a given candidate.""" + + api_lookup_order = ( + (collection_candidate.src, ) + if isinstance(collection_candidate.src, GalaxyAPI) + else self._apis + ) + for api in api_lookup_order: + try: + version_metadata = api.get_collection_version_metadata( + collection_candidate.namespace, + collection_candidate.name, + collection_candidate.ver, + ) + except GalaxyError as api_err: + last_err = api_err + else: + self._concrete_art_mgr.save_collection_source( + collection_candidate, + version_metadata.download_url, + version_metadata.artifact_sha256, + api.token, + ) + return version_metadata + + raise last_err + + def get_collection_dependencies(self, collection_candidate): + # type: (Candidate) -> Dict[str, str] + # FIXME: return Requirement instances instead? + """Retrieve collection dependencies of a given candidate.""" + if collection_candidate.is_concrete_artifact: + return ( + self. + _concrete_art_mgr. + get_direct_collection_dependencies + )(collection_candidate) + + return ( + self. + get_collection_version_metadata(collection_candidate). + dependencies + ) diff --git a/lib/ansible/galaxy/dependency_resolution/__init__.py b/lib/ansible/galaxy/dependency_resolution/__init__.py index 7d0020691b..71b895ba3d 100644 --- a/lib/ansible/galaxy/dependency_resolution/__init__.py +++ b/lib/ansible/galaxy/dependency_resolution/__init__.py @@ -1,7 +1,49 @@ # -*- coding: utf-8 -*- -# Copyright: (c) 2020, Ansible Project +# Copyright: (c) 2020-2021, Ansible Project # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) """Dependency resolution machinery.""" from __future__ import (absolute_import, division, print_function) __metaclass__ = type + +try: + from typing import TYPE_CHECKING +except ImportError: + TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Iterable + from ansible.galaxy.api import GalaxyAPI + from ansible.galaxy.collection.concrete_artifact_manager import ( + ConcreteArtifactsManager, + ) + from ansible.galaxy.dependency_resolution.dataclasses import Candidate + +from ansible.galaxy.collection.galaxy_api_proxy import MultiGalaxyAPIProxy +from ansible.galaxy.dependency_resolution.providers import CollectionDependencyProvider +from ansible.galaxy.dependency_resolution.reporters import CollectionDependencyReporter +from ansible.galaxy.dependency_resolution.resolvers import CollectionDependencyResolver + + +def build_collection_dependency_resolver( + galaxy_apis, # type: Iterable[GalaxyAPI] + concrete_artifacts_manager, # type: ConcreteArtifactsManager + preferred_candidates=None, # type: Iterable[Candidate] + with_deps=True, # type: bool + with_pre_releases=False, # type: bool +): # type: (...) -> CollectionDependencyResolver + """Return a collection dependency resolver. + + The returned instance will have a ``resolve()`` method for + further consumption. + """ + return CollectionDependencyResolver( + CollectionDependencyProvider( + apis=MultiGalaxyAPIProxy(galaxy_apis, concrete_artifacts_manager), + concrete_artifacts_manager=concrete_artifacts_manager, + preferred_candidates=preferred_candidates, + with_deps=with_deps, + with_pre_releases=with_pre_releases, + ), + CollectionDependencyReporter(), + ) diff --git a/lib/ansible/galaxy/dependency_resolution/dataclasses.py b/lib/ansible/galaxy/dependency_resolution/dataclasses.py new file mode 100644 index 0000000000..bea5dacc96 --- /dev/null +++ b/lib/ansible/galaxy/dependency_resolution/dataclasses.py @@ -0,0 +1,435 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""Dependency structs.""" +# FIXME: add caching all over the place + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import json +import os +from collections import namedtuple +from glob import iglob +from keyword import iskeyword # used in _is_fqcn + +try: + from typing import TYPE_CHECKING +except ImportError: + TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Tuple, Type, TypeVar + from ansible.galaxy.collection.concrete_artifact_manager import ( + ConcreteArtifactsManager, + ) + Collection = TypeVar( + 'Collection', + 'Candidate', 'Requirement', + '_ComputedReqKindsMixin', + ) + +import yaml + +from ansible.errors import AnsibleError +from ansible.galaxy.api import GalaxyAPI +from ansible.module_utils._text import to_bytes, to_native, to_text +from ansible.module_utils.six.moves.urllib.parse import urlparse +from ansible.module_utils.six import raise_from +from ansible.utils.display import Display + + +try: # NOTE: py3/py2 compat + # FIXME: put somewhere into compat + # py2 mypy can't deal with try/excepts + _is_py_id = str.isidentifier # type: ignore[attr-defined] +except AttributeError: # Python 2 + # FIXME: port this to AnsibleCollectionRef.is_valid_collection_name + from re import match as _match_pattern + from tokenize import Name as _VALID_IDENTIFIER_REGEX + _valid_identifier_string_regex = ''.join((_VALID_IDENTIFIER_REGEX, r'\Z')) + + def _is_py_id(tested_str): + # Ref: https://stackoverflow.com/a/55802320/595220 + return bool(_match_pattern(_valid_identifier_string_regex, tested_str)) + + +_ALLOW_CONCRETE_POINTER_IN_SOURCE = False # NOTE: This is a feature flag +_GALAXY_YAML = b'galaxy.yml' +_MANIFEST_JSON = b'MANIFEST.json' + + +display = Display() + + +def _is_collection_src_dir(dir_path): + b_dir_path = to_bytes(dir_path, errors='surrogate_or_strict') + return os.path.isfile(os.path.join(b_dir_path, _GALAXY_YAML)) + + +def _is_installed_collection_dir(dir_path): + b_dir_path = to_bytes(dir_path, errors='surrogate_or_strict') + return os.path.isfile(os.path.join(b_dir_path, _MANIFEST_JSON)) + + +def _is_collection_dir(dir_path): + return ( + _is_installed_collection_dir(dir_path) or + _is_collection_src_dir(dir_path) + ) + + +def _find_collections_in_subdirs(dir_path): + b_dir_path = to_bytes(dir_path, errors='surrogate_or_strict') + galaxy_yml_glob_pattern = os.path.join( + b_dir_path, + # b'*', # namespace is supposed to be top-level per spec + b'*', # collection name + _GALAXY_YAML, + ) + return ( + os.path.dirname(galaxy_yml) + for galaxy_yml in iglob(galaxy_yml_glob_pattern) + ) + + +def _is_collection_namespace_dir(tested_str): + return any(_find_collections_in_subdirs(tested_str)) + + +def _is_file_path(tested_str): + return os.path.isfile(to_bytes(tested_str, errors='surrogate_or_strict')) + + +def _is_http_url(tested_str): + return urlparse(tested_str).scheme.lower() in {'http', 'https'} + + +def _is_git_url(tested_str): + return tested_str.startswith(('git+', 'git@')) + + +def _is_concrete_artifact_pointer(tested_str): + return any( + predicate(tested_str) + for predicate in ( + # NOTE: Maintain the checks to be sorted from light to heavy: + _is_git_url, + _is_http_url, + _is_file_path, + _is_collection_dir, + _is_collection_namespace_dir, + ) + ) + + +def _is_fqcn(tested_str): + # FIXME: port this to AnsibleCollectionRef.is_valid_collection_name + if tested_str.count('.') != 1: + return False + + return all( + # FIXME: keywords and identifiers are different in differnt Pythons + not iskeyword(ns_or_name) and _is_py_id(ns_or_name) + for ns_or_name in tested_str.split('.') + ) + + +class _ComputedReqKindsMixin: + + @classmethod + def from_dir_path_as_unknown( # type: ignore[misc] + cls, # type: Type[Collection] + dir_path, # type: bytes + art_mgr, # type: ConcreteArtifactsManager + ): # type: (...) -> Collection + """Make collection from an unspecified dir type. + + This alternative constructor attempts to grab metadata from the + given path if it's a directory. If there's no metadata, it + falls back to guessing the FQCN based on the directory path and + sets the version to "*". + + It raises a ValueError immediatelly if the input is not an + existing directory path. + """ + if not os.path.isdir(dir_path): + raise ValueError( + "The collection directory '{path!s}' doesn't exist". + format(path=to_native(dir_path)), + ) + + try: + return cls.from_dir_path(dir_path, art_mgr) + except ValueError: + return cls.from_dir_path_implicit(dir_path) + + @classmethod + def from_dir_path(cls, dir_path, art_mgr): + """Make collection from an directory with metadata.""" + b_dir_path = to_bytes(dir_path, errors='surrogate_or_strict') + if not _is_collection_dir(b_dir_path): + display.warning( + u"Collection at '{path!s}' does not have a {manifest_json!s} " + u'file, nor has it {galaxy_yml!s}: cannot detect version.'. + format( + galaxy_yml=to_text(_GALAXY_YAML), + manifest_json=to_text(_MANIFEST_JSON), + path=to_text(dir_path, errors='surrogate_or_strict'), + ), + ) + raise ValueError( + '`dir_path` argument must be an installed or a source' + ' collection directory.', + ) + + tmp_inst_req = cls(None, None, dir_path, 'dir') + req_name = art_mgr.get_direct_collection_fqcn(tmp_inst_req) + req_version = art_mgr.get_direct_collection_version(tmp_inst_req) + + return cls(req_name, req_version, dir_path, 'dir') + + @classmethod + def from_dir_path_implicit( # type: ignore[misc] + cls, # type: Type[Collection] + dir_path, # type: bytes + ): # type: (...) -> Collection + """Construct a collection instance based on an arbitrary dir. + + This alternative constructor infers the FQCN based on the parent + and current directory names. It also sets the version to "*" + regardless of whether any of known metadata files are present. + """ + # There is no metadata, but it isn't required for a functional collection. Determine the namespace.name from the path. + u_dir_path = to_text(dir_path, errors='surrogate_or_strict') + path_list = u_dir_path.split(os.path.sep) + req_name = '.'.join(path_list[-2:]) + return cls(req_name, '*', dir_path, 'dir') # type: ignore[call-arg] + + @classmethod + def from_string(cls, collection_input, artifacts_manager): + req = {} + if _is_concrete_artifact_pointer(collection_input): + # Arg is a file path or URL to a collection + req['name'] = collection_input + else: + req['name'], _sep, req['version'] = collection_input.partition(':') + if not req['version']: + del req['version'] + + return cls.from_requirement_dict(req, artifacts_manager) + + @classmethod + def from_requirement_dict(cls, collection_req, art_mgr): + req_name = collection_req.get('name', None) + req_version = collection_req.get('version', '*') + req_type = collection_req.get('type') + # TODO: decide how to deprecate the old src API behavior + req_source = collection_req.get('source', None) + + if req_type is None: + if ( # FIXME: decide on the future behavior: + _ALLOW_CONCRETE_POINTER_IN_SOURCE + and req_source is not None + and _is_concrete_artifact_pointer(req_source) + ): + src_path = req_source + elif req_name is not None and _is_fqcn(req_name): + req_type = 'galaxy' + elif ( + req_name is not None + and _is_concrete_artifact_pointer(req_name) + ): + src_path, req_name = req_name, None + else: + dir_tip_tmpl = ( # NOTE: leading LFs are for concat + '\n\nTip: Make sure you are pointing to the right ' + 'subdirectory — `{src!s}` looks like a directory ' + 'but it is neither a collection, nor a namespace ' + 'dir.' + ) + + if req_source is not None and os.path.isdir(req_source): + tip = dir_tip_tmpl.format(src=req_source) + elif req_name is not None and os.path.isdir(req_name): + tip = dir_tip_tmpl.format(src=req_name) + elif req_name: + tip = '\n\nCould not find {0}.'.format(req_name) + else: + tip = '' + + raise AnsibleError( # NOTE: I'd prefer a ValueError instead + 'Neither the collection requirement entry key ' + "'name', nor 'source' point to a concrete " + "resolvable collection artifact. Also 'name' is " + 'not an FQCN. A valid collection name must be in ' + 'the format <namespace>.<collection>. Please make ' + 'sure that the namespace and the collection name ' + ' contain characters from [a-zA-Z0-9_] only.' + '{extra_tip!s}'.format(extra_tip=tip), + ) + + if req_type is None: + if _is_git_url(src_path): + req_type = 'git' + req_source = src_path + elif _is_http_url(src_path): + req_type = 'url' + req_source = src_path + elif _is_file_path(src_path): + req_type = 'file' + req_source = src_path + elif _is_collection_dir(src_path): + req_type = 'dir' + req_source = src_path + elif _is_collection_namespace_dir(src_path): + req_name = None # No name for a virtual req or "namespace."? + req_type = 'subdirs' + req_source = src_path + else: + raise AnsibleError( # NOTE: this is never supposed to be hit + 'Failed to automatically detect the collection ' + 'requirement type.', + ) + + if req_type not in {'file', 'galaxy', 'git', 'url', 'dir', 'subdirs'}: + raise AnsibleError( + "The collection requirement entry key 'type' must be " + 'one of file, galaxy, git, dir, subdirs, or url.' + ) + + if req_name is None and req_type == 'galaxy': + raise AnsibleError( + 'Collections requirement entry should contain ' + "the key 'name' if it's requested from a Galaxy-like " + 'index server.', + ) + + if req_type != 'galaxy' and req_source is None: + req_source, req_name = req_name, None + + if ( + req_type == 'galaxy' and + isinstance(req_source, GalaxyAPI) and + not _is_http_url(req_source.api_server) + ): + raise AnsibleError( + "Collections requirement 'source' entry should contain " + 'a valid Galaxy API URL but it does not: {not_url!s} ' + 'is not an HTTP URL.'. + format(not_url=req_source.api_server), + ) + + tmp_inst_req = cls(req_name, req_version, req_source, req_type) + + if req_type not in {'galaxy', 'subdirs'} and req_name is None: + req_name = art_mgr.get_direct_collection_fqcn(tmp_inst_req) # TODO: fix the cache key in artifacts manager? + + if req_type not in {'galaxy', 'subdirs'} and req_version == '*': + req_version = art_mgr.get_direct_collection_version(tmp_inst_req) + + return cls( + req_name, req_version, + req_source, req_type, + ) + + def __repr__(self): + return ( + '<{self!s} of type {coll_type!r} from {src!s}>'. + format(self=self, coll_type=self.type, src=self.src or 'Galaxy') + ) + + def __str__(self): + return to_native(self.__unicode__()) + + def __unicode__(self): + if self.fqcn is None: + return ( + u'"virtual collection Git repo"' if self.is_scm + else u'"virtual collection namespace"' + ) + + return ( + u'{fqcn!s}:{ver!s}'. + format(fqcn=to_text(self.fqcn), ver=to_text(self.ver)) + ) + + def _get_separate_ns_n_name(self): # FIXME: use LRU cache + return self.fqcn.split('.') + + @property + def namespace(self): + if self.is_virtual: + raise TypeError('Virtual collections do not have a namespace') + + return self._get_separate_ns_n_name()[0] + + @property + def name(self): + if self.is_virtual: + raise TypeError('Virtual collections do not have a name') + + return self._get_separate_ns_n_name()[-1] + + @property + def canonical_package_id(self): + if not self.is_virtual: + return to_native(self.fqcn) + + return ( + '<virtual namespace from {src!s} of type {src_type!s}>'. + format(src=to_native(self.src), src_type=to_native(self.type)) + ) + + @property + def is_virtual(self): + return self.is_scm or self.is_subdirs + + @property + def is_file(self): + return self.type == 'file' + + @property + def is_dir(self): + return self.type == 'dir' + + @property + def namespace_collection_paths(self): + return [ + to_native(path) + for path in _find_collections_in_subdirs(self.src) + ] + + @property + def is_subdirs(self): + return self.type == 'subdirs' + + @property + def is_url(self): + return self.type == 'url' + + @property + def is_scm(self): + return self.type == 'git' + + @property + def is_concrete_artifact(self): + return self.type in {'git', 'url', 'file', 'dir', 'subdirs'} + + @property + def is_online_index_pointer(self): + return not self.is_concrete_artifact + + +class Requirement( + _ComputedReqKindsMixin, + namedtuple('Requirement', ('fqcn', 'ver', 'src', 'type')), +): + """An abstract requirement request.""" + + +class Candidate( + _ComputedReqKindsMixin, + namedtuple('Candidate', ('fqcn', 'ver', 'src', 'type')) +): + """A concrete collection candidate with its version resolved.""" diff --git a/lib/ansible/galaxy/dependency_resolution/errors.py b/lib/ansible/galaxy/dependency_resolution/errors.py new file mode 100644 index 0000000000..e57bd06e57 --- /dev/null +++ b/lib/ansible/galaxy/dependency_resolution/errors.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""Dependency resolution exceptions.""" + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +from resolvelib.resolvers import ( + ResolutionImpossible as CollectionDependencyResolutionImpossible, +) diff --git a/lib/ansible/galaxy/dependency_resolution/providers.py b/lib/ansible/galaxy/dependency_resolution/providers.py new file mode 100644 index 0000000000..c93fe433db --- /dev/null +++ b/lib/ansible/galaxy/dependency_resolution/providers.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""Requirement provider interfaces.""" + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import functools + +try: + from typing import TYPE_CHECKING +except ImportError: + TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Iterable, List, NamedTuple, Optional, Union + from ansible.galaxy.collection.concrete_artifact_manager import ( + ConcreteArtifactsManager, + ) + from ansible.galaxy.collection.galaxy_api_proxy import MultiGalaxyAPIProxy + +from ansible.galaxy.dependency_resolution.dataclasses import ( + Candidate, + Requirement, +) +from ansible.galaxy.dependency_resolution.versioning import ( + is_pre_release, + meets_requirements, +) +from ansible.utils.version import SemanticVersion + +from resolvelib import AbstractProvider + + +class CollectionDependencyProvider(AbstractProvider): + """Delegate providing a requirement interface for the resolver.""" + + def __init__( + self, # type: CollectionDependencyProvider + apis, # type: MultiGalaxyAPIProxy + concrete_artifacts_manager=None, # type: ConcreteArtifactsManager + preferred_candidates=None, # type: Iterable[Candidate] + with_deps=True, # type: bool + with_pre_releases=False, # type: bool + ): # type: (...) -> None + r"""Initialize helper attributes. + + :param api: An instance of the multiple Galaxy APIs wrapper. + + :param concrete_artifacts_manager: An instance of the caching \ + concrete artifacts manager. + + :param with_deps: A flag specifying whether the resolver \ + should attempt to pull-in the deps of the \ + requested requirements. On by default. + + :param with_pre_releases: A flag specifying whether the \ + resolver should skip pre-releases. \ + Off by default. + """ + self._api_proxy = apis + self._make_req_from_dict = functools.partial( + Requirement.from_requirement_dict, + art_mgr=concrete_artifacts_manager, + ) + self._preferred_candidates = set(preferred_candidates or ()) + self._with_deps = with_deps + self._with_pre_releases = with_pre_releases + + def identify(self, requirement_or_candidate): + # type: (Union[Candidate, Requirement]) -> str + """Given requirement or candidate, return an identifier for it. + + This is used to identify a requirement or candidate, e.g. + whether two requirements should have their specifier parts + (version ranges or pins) merged, whether two candidates would + conflict with each other (because they have same name but + different versions). + """ + return requirement_or_candidate.canonical_package_id + + def get_preference( + self, # type: CollectionDependencyProvider + resolution, # type: Optional[Candidate] + candidates, # type: List[Candidate] + information, # type: List[NamedTuple] + ): # type: (...) -> Union[float, int] + """Return sort key function return value for given requirement. + + This result should be based on preference that is defined as + "I think this requirement should be resolved first". + The lower the return value is, the more preferred this + group of arguments is. + + :param resolution: Currently pinned candidate, or ``None``. + + :param candidates: A list of possible candidates. + + :param information: A list of requirement information. + + Each ``information`` instance is a named tuple with two entries: + + * ``requirement`` specifies a requirement contributing to + the current candidate list + + * ``parent`` specifies the candidate that provides + (dependend on) the requirement, or `None` + to indicate a root requirement. + + The preference could depend on a various of issues, including + (not necessarily in this order): + + * Is this package pinned in the current resolution result? + + * How relaxed is the requirement? Stricter ones should + probably be worked on first? (I don't know, actually.) + + * How many possibilities are there to satisfy this + requirement? Those with few left should likely be worked on + first, I guess? + + * Are there any known conflicts for this requirement? + We should probably work on those with the most + known conflicts. + + A sortable value should be returned (this will be used as the + `key` parameter of the built-in sorting function). The smaller + the value is, the more preferred this requirement is (i.e. the + sorting function is called with ``reverse=False``). + """ + if any( + candidate in self._preferred_candidates + for candidate in candidates + ): + # NOTE: Prefer pre-installed candidates over newer versions + # NOTE: available from Galaxy or other sources. + return float('-inf') + return len(candidates) + + def find_matches(self, requirements): + # type: (List[Requirement]) -> List[Candidate] + r"""Find all possible candidates satisfying given requirements. + + This tries to get candidates based on the requirements' types. + + For concrete requirements (SCM, dir, namespace dir, local or + remote archives), the one-and-only match is returned + + For a "named" requirement, Galaxy-compatible APIs are consulted + to find concrete candidates for this requirement. Of theres a + pre-installed candidate, it's prepended in front of others. + + :param requirements: A collection of requirements which all of \ + the returned candidates must match. \ + All requirements are guaranteed to have \ + the same identifier. \ + The collection is never empty. + + :returns: An iterable that orders candidates by preference, \ + e.g. the most preferred candidate comes first. + """ + # FIXME: The first requirement may be a Git repo followed by + # FIXME: its cloned tmp dir. Using only the first one creates + # FIXME: loops that prevent any further dependency exploration. + # FIXME: We need to figure out how to prevent this. + first_req = requirements[0] + fqcn = first_req.fqcn + # The fqcn is guaranteed to be the same + coll_versions = self._api_proxy.get_collection_versions(first_req) + if first_req.is_concrete_artifact: + # FIXME: do we assume that all the following artifacts are also concrete? + # FIXME: does using fqcn==None cause us problems here? + + return [ + Candidate(fqcn, version, _none_src_server, first_req.type) + for version, _none_src_server in coll_versions + ] + + preinstalled_candidates = { + candidate for candidate in self._preferred_candidates + if candidate.fqcn == fqcn + } + + return list(preinstalled_candidates) + sorted( + { + candidate for candidate in ( + Candidate(fqcn, version, src_server, 'galaxy') + for version, src_server in coll_versions + ) + if all(self.is_satisfied_by(requirement, candidate) for requirement in requirements) + # FIXME + # if all(self.is_satisfied_by(requirement, candidate) and ( + # requirement.src is None or # if this is true for some candidates but not all it will break key param - Nonetype can't be compared to str + # requirement.src == candidate.src + # )) + }, + key=lambda candidate: ( + SemanticVersion(candidate.ver), candidate.src, + ), + reverse=True, # prefer newer versions over older ones + ) + + def is_satisfied_by(self, requirement, candidate): + # type: (Requirement, Candidate) -> bool + r"""Whether the given requirement is satisfiable by a candidate. + + :param requirement: A requirement that produced the `candidate`. + + :param candidate: A pinned candidate supposedly matchine the \ + `requirement` specifier. It is guaranteed to \ + have been generated from the `requirement`. + + :returns: Indication whether the `candidate` is a viable \ + solution to the `requirement`. + """ + # NOTE: Only allow pre-release candidates if we want pre-releases or + # the req ver was an exact match with the pre-release version. + allow_pre_release = self._with_pre_releases or not ( + requirement.ver == '*' or + requirement.ver.startswith('<') or + requirement.ver.startswith('>') or + requirement.ver.startswith('!=') + ) + if is_pre_release(candidate.ver) and not allow_pre_release: + return False + + # NOTE: This is a set of Pipenv-inspired optimizations. Ref: + # https://github.com/sarugaku/passa/blob/2ac00f1/src/passa/models/providers.py#L58-L74 + if ( + requirement.is_virtual or + candidate.is_virtual or + requirement.ver == '*' + ): + return True + + return meets_requirements( + version=candidate.ver, + requirements=requirement.ver, + ) + + def get_dependencies(self, candidate): + # type: (Candidate) -> List[Candidate] + r"""Get direct dependencies of a candidate. + + :returns: A collection of requirements that `candidate` \ + specifies as its dependencies. + """ + # FIXME: If there's several galaxy servers set, there may be a + # FIXME: situation when the metadata of the same collection + # FIXME: differs. So how do we resolve this case? Priority? + # FIXME: Taking into account a pinned hash? Exploding on + # FIXME: any differences? + # NOTE: The underlying implmentation currently uses first found + req_map = self._api_proxy.get_collection_dependencies(candidate) + + # NOTE: This guard expression MUST perform an early exit only + # NOTE: after the `get_collection_dependencies()` call because + # NOTE: internally it polulates the artifact URL of the candidate, + # NOTE: its SHA hash and the Galaxy API token. These are still + # NOTE: necessary with `--no-deps` because even with the disabled + # NOTE: dependency resolution the outer layer will still need to + # NOTE: know how to download and validate the artifact. + # + # NOTE: Virtual candidates should always return dependencies + # NOTE: because they are ephemeral and non-installable. + if not self._with_deps and not candidate.is_virtual: + return [] + + return [ + self._make_req_from_dict({'name': dep_name, 'version': dep_req}) + for dep_name, dep_req in req_map.items() + ] diff --git a/lib/ansible/galaxy/dependency_resolution/reporters.py b/lib/ansible/galaxy/dependency_resolution/reporters.py new file mode 100644 index 0000000000..d8eacb70df --- /dev/null +++ b/lib/ansible/galaxy/dependency_resolution/reporters.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""Requiement reporter implementations.""" + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +from resolvelib import BaseReporter + + +class CollectionDependencyReporter(BaseReporter): + """A dependency reporter for Ansible Collections. + + This is a proxy class allowing us to abstract away importing resolvelib + outside of the `ansible.galaxy.dependency_resolution` Python package. + """ diff --git a/lib/ansible/galaxy/dependency_resolution/resolvers.py b/lib/ansible/galaxy/dependency_resolution/resolvers.py new file mode 100644 index 0000000000..1b3e30ff86 --- /dev/null +++ b/lib/ansible/galaxy/dependency_resolution/resolvers.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# Copyright: (c) 2020-2021, Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) +"""Requirement resolver implementations.""" + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +from resolvelib import Resolver + + +class CollectionDependencyResolver(Resolver): + """A dependency resolver for Ansible Collections. + + This is a proxy class allowing us to abstract away importing resolvelib + outside of the `ansible.galaxy.dependency_resolution` Python package. + """ diff --git a/lib/ansible/galaxy/dependency_resolution/versioning.py b/lib/ansible/galaxy/dependency_resolution/versioning.py index c5c5ae85ab..c57f0d21e9 100644 --- a/lib/ansible/galaxy/dependency_resolution/versioning.py +++ b/lib/ansible/galaxy/dependency_resolution/versioning.py @@ -15,7 +15,10 @@ from ansible.utils.version import SemanticVersion def is_pre_release(version): # type: (str) -> bool """Figure out if a given version is a pre-release.""" - return SemanticVersion(version).is_prerelease + try: + return SemanticVersion(version).is_prerelease + except ValueError: + return False def meets_requirements(version, requirements): |