"""Test selection utility.

Defines filtering rules for what tests to include in a suite depending
on whether they apply to C++ unit tests, dbtests, or JS tests.
"""

import collections
import errno
import fnmatch
import math
import os.path
import random
import subprocess
import sys

import buildscripts.ciconfig.tags as _tags
from . import config
from . import errors
from . import utils
from .utils import globstar
from .utils import jscomment

########################
#  Test file explorer  #
########################


class TestFileExplorer(object):
    """A component that can perform file system related operations.

    The file related code has been confined to this class for testability.
    """

    @staticmethod
    def is_glob_pattern(path):
        """Indicate if the provided path is a glob pattern.

        See buildscripts.resmokelib.utils.globstar.is_glob_pattern().
        """
        return globstar.is_glob_pattern(path)

    @staticmethod
    def iglob(pattern):  # noqa: D406,D407,D411,D413
        """Expand the given glob pattern with regard to the current working directory.

        See buildscripts.resmokelib.utils.globstar.iglob().
        Returns:
            A list of paths as a list(str).
        """
        return globstar.iglob(pattern)

    @staticmethod
    def jstest_tags(file_path):  # noqa: D406,D407,D411,D413
        """Extract the tags from a JavaScript test file.

        See buildscripts.resmokelib.utils.jscomment.get_tags().
        Returns:
            A list of tags.
        """
        return jscomment.get_tags(file_path)

    @staticmethod
    def read_root_file(root_file_path):  # noqa: D406,D407,D411,D413
        """Read a file containing the list of root test files.

        Args:
            root_file_path: the path to a file containing the path of each test on a separate line.
        Returns:
            A list of paths as a list(str).
        """
        tests = []
        with open(root_file_path, "r") as filep:
            for test_path in filep:
                test_path = test_path.strip()
                tests.append(test_path)
        return tests

    @staticmethod
    def fnmatchcase(name, pattern):
        """Indicate if the given name matches the given pattern.

        See buildscripts.resmokelib.utils.fnmatch.fnmatchcase().
        """
        return fnmatch.fnmatchcase(name, pattern)

    @staticmethod
    def isfile(path):
        """Indicate if the given path corresponds to an existing file."""
        return os.path.isfile(path)

    def list_dbtests(self, dbtest_binary):
        """List the available dbtests suites."""
        returncode, stdout = self._run_program(dbtest_binary, ["--list"])

        if returncode != 0:
            raise errors.ResmokeError("Getting list of dbtest suites failed")

        return stdout.splitlines()

    @staticmethod
    def _run_program(binary, args):  # noqa: D406,D407,D411,D413
        """Run a program.

        Args:
            binary: the binary to run.
            args: a list of arguments for the binary.
        Returns:
            A tuple consisting of the program return code and its output.
        """
        command = [binary]
        command.extend(args)
        program = subprocess.Popen(command, stdout=subprocess.PIPE)
        stdout = program.communicate()[0]

        return program.returncode, stdout.decode("utf-8")

    @staticmethod
    def parse_tag_file(test_kind):
        """Parse the tag file and return a dict of tagged tests.

        The resulting dict will have as a key the filename and the
        value a list of tags, i.e., {'file1.js': ['tag1', 'tag2'], 'file2.js': ['tag2', 'tag3']}.
        """
        tagged_tests = collections.defaultdict(list)
        if config.TAG_FILE:
            tags_conf = _tags.TagsConfig.from_file(config.TAG_FILE)
            tagged_roots = tags_conf.get_test_patterns(test_kind)
            for tagged_root in tagged_roots:
                # Multiple tests could be returned for a set of tags.
                tests = globstar.iglob(tagged_root)
                test_tags = tags_conf.get_tags(test_kind, tagged_root)
                for test in tests:
                    # A test could have a tag in more than one place, due to wildcards in the
                    # selector.
                    tagged_tests[test].extend(test_tags)
        return tagged_tests


class _TestList(object):
    """
    A list of tests on which filtering operations can be applied.

    Args:
        test_file_explorer: a TestFileExplorer instance.
        roots: a list of tests to initialize the _TestList with.
        tests_are_files: indicates if the tests are file paths. If so the _TestList will perform
            glob expansion of paths and check if they are existing files. If not, calling
            'include_files()' or 'exclude_files()' will raise an TypeError.
    """

    def __init__(self, test_file_explorer, roots, tests_are_files=True):
        """Initialize the _TestList with a TestFileExplorer component and a list of root tests."""
        self._test_file_explorer = test_file_explorer
        self._tests_are_files = tests_are_files
        self._roots = self._expand_files(roots) if tests_are_files else roots
        self._filtered = set(self._roots)

    def _expand_files(self, tests):
        expanded_tests = []
        for test in tests:
            if self._test_file_explorer.is_glob_pattern(test):
                expanded_tests.extend(self._test_file_explorer.iglob(test))
            else:
                if not self._test_file_explorer.isfile(test):
                    raise ValueError("Unrecognized test file: {}".format(test))
                expanded_tests.append(os.path.normpath(test))
        return expanded_tests

    def include_files(self, include_files, force=False):
        """Filter the test list so that it only includes files matching 'include_files'.

        Args:
            include_files: a list of paths or glob patterns that match the files to include.
            force: if True include the matching files that were previously excluded, otherwise only
                   include files that match and were not previously excluded from this _TestList.
        """
        if not self._tests_are_files:
            raise TypeError("_TestList does not contain files.")
        expanded_include_files = set()
        for path in include_files:
            if self._test_file_explorer.is_glob_pattern(path):
                expanded_include_files.update(set(self._test_file_explorer.iglob(path)))
            else:
                expanded_include_files.add(os.path.normpath(path))
        self._filtered = self._filtered & expanded_include_files
        if force:
            self._filtered |= set(self._roots) & expanded_include_files

    def exclude_files(self, exclude_files):  # noqa: D406,D407,D411,D413
        """Exclude from the test list the files that match elements from 'exclude_files'.

        Args:
            exclude_files: a list of paths or glob patterns that match the files to exclude.
        Raises:
            ValueError: if exclude_files contains a non-globbed path that does not correspond to
                an existing file.
        """
        if not self._tests_are_files:
            raise TypeError("_TestList does not contain files.")
        for path in exclude_files:
            if self._test_file_explorer.is_glob_pattern(path):
                paths = self._test_file_explorer.iglob(path)
                for expanded_path in paths:
                    self._filtered.discard(expanded_path)
            else:
                path = os.path.normpath(path)
                if path not in self._roots:
                    raise ValueError(
                        ("Excluded test file {} does not exist, perhaps it was renamed or removed"
                         " , and should be modified in, or removed from, the exclude_files list.".
                         format(path)))
                self._filtered.discard(path)

    def match_tag_expression(self, tag_expression, get_tags):
        """Filter the test list to only include tests that match the tag expression.

        Args:
            tag_expression: a callable object that takes a list of tags and indicate if the required
                condition is met by returning a boolean.
            get_tags: a callable object that takes a test and returns the corresponding list of
                tags.
        """
        self._filtered = {test for test in self._filtered if tag_expression(get_tags(test))}

    def include_any_pattern(self, patterns):
        """Filter the test list to only include tests that match any provided glob patterns."""

        def match(test):
            """Return True if 'test' matches a pattern."""
            for pattern in patterns:
                if test == pattern or fnmatch.fnmatchcase(test, pattern):
                    return True
            return False

        self._filtered = {test for test in self._filtered if match(test)}

    def get_tests(self):
        """Return the test list as a list(str).

        The tests are returned in the same order as they are found in the root tests.
        """
        tests = []
        excluded = []
        for test in self._roots:
            if test in self._filtered:
                if test not in tests:
                    tests.append(test)
            elif test not in excluded:
                excluded.append(test)
        return tests, excluded


##############################
#  Tag matching expressions  #
##############################


class _AllOfExpression(object):
    """A tag matching expression that requires all child expressions to match."""

    def __init__(self, children):
        self.__children = children

    def __call__(self, file_tags):
        return all(child(file_tags) for child in self.__children)


class _AnyOfExpression(object):
    """A tag matching expression that requires at least one of the child expressions."""

    def __init__(self, children):
        self.__children = children

    def __call__(self, file_tags):
        return any(child(file_tags) for child in self.__children)


class _NotExpression(object):
    """A tag matching expression that matches if and only if the child expression does not match."""

    def __init__(self, child):
        self.__child = child

    def __call__(self, file_tags):
        return not self.__child(file_tags)


class _MatchExpression(object):
    """A tag matching expression that matches when a specific tag is present."""

    def __init__(self, tag):
        self.__tag = tag

    def __call__(self, file_tags):
        return self.__tag in file_tags


def make_expression(conf):
    """Create a tag matching expression from an expression configuration.

    The syntax for the expression configuration is:
    - expr: str_expr | dict_expr
    - str_expr: "<tagname>"
    - dict_expr: allof_expr | anyof_expr | not_expr
    - allof_expr: {"$allOf": [expr, ...]}
    - anyof_expr: {"$anyOf": [expr, ...]}
    - not_expr: {"$not": expr}
    """
    if isinstance(conf, str):
        return _MatchExpression(conf)
    elif isinstance(conf, dict):
        if len(conf) != 1:
            raise ValueError("Tag matching expressions should only contain one key")
        key = list(conf.keys())[0]
        value = conf[key]
        if key == "$allOf":
            return _AllOfExpression(_make_expression_list(value))
        elif key == "$anyOf":
            return _AnyOfExpression(_make_expression_list(value))
        elif key == "$not":
            return _NotExpression(make_expression(value))
    raise ValueError("Invalid tag matching expression: {}".format(conf))


def _make_expression_list(configs):
    return [make_expression(conf) for conf in configs]


####################
#  Test Selectors  #
####################


class _SelectorConfig(object):
    """Base object to represent the configuration for test selection."""

    def __init__(  # pylint: disable=too-many-arguments
            self, root=None, roots=None, include_files=None, exclude_files=None, include_tags=None,
            exclude_tags=None, include_with_any_tags=None, exclude_with_any_tags=None):
        """Initialize the _SelectorConfig from the configuration elements.

        Args:
            root: the path to a file containing the list of root tests. Incompatible with 'roots'.
            roots: a list of root tests. Incompatible with 'root'.
            include_files: a list of paths or glob patterns the tests must be included in.
            exclude_files: a list of paths or glob patterns the tests must not be included in.
            include_tags: a str or dict representing a tag matching expression that the tags of the
                selected tests must match. Incompatible with 'exclude_tags'.
            exclude_tags: a str or dict representing a tag matching expression that the tags of the
                selected tests must not match. Incompatible with 'include_tags'.
            include_with_any_tags: a list of tags. All selected tests must have at least one them.
            exclude_with_any_tags: a list of tags. No selected tests can have any of them.
        """
        # Incompatible arguments check.
        if root and roots:
            raise ValueError("root and roots cannot be specified at the same time")
        if include_tags and exclude_tags:
            raise ValueError("include_tags and exclude_tags cannot be specified at the same time")
        self.root = root
        self.roots = roots
        self.include_files = utils.default_if_none(include_files, [])
        self.exclude_files = utils.default_if_none(exclude_files, [])
        include_with_any_tags = self.__merge_lists(include_with_any_tags,
                                                   config.INCLUDE_WITH_ANY_TAGS)
        exclude_with_any_tags = self.__merge_lists(exclude_with_any_tags,
                                                   config.EXCLUDE_WITH_ANY_TAGS)

        self.tags_expression = self.__make_tags_expression(
            include_tags, exclude_tags, include_with_any_tags, exclude_with_any_tags)

    @staticmethod
    def __merge_lists(list_a, list_b):
        if list_a or list_b:
            if list_a is None:
                return set(list_b)
            elif list_b is None:
                return set(list_a)
            return set(list_a) | set(list_b)
        return None

    @staticmethod
    def __make_tags_expression(include_tags, exclude_tags, include_with_any_tags,
                               exclude_with_any_tags):
        expressions = []
        if include_tags:
            expressions.append(make_expression(include_tags))
        elif exclude_tags:
            expressions.append(_NotExpression(make_expression(exclude_tags)))
        if include_with_any_tags:
            include_with_any_expr = make_expression({"$anyOf": include_with_any_tags})
            expressions.append(include_with_any_expr)
        if exclude_with_any_tags:
            exclude_with_any_expr = make_expression({"$not": {"$anyOf": exclude_with_any_tags}})
            expressions.append(exclude_with_any_expr)

        if expressions:
            return _AllOfExpression(expressions)
        return None


class _Selector(object):
    """Selection algorithm to select tests matching a selector configuration."""

    def __init__(self, test_file_explorer, tests_are_files=True):
        """Initialize the _Selector.

        Args:
            test_file_explorer: a TestFileExplorer instance.
        """
        self._test_file_explorer = test_file_explorer
        self._tests_are_files = tests_are_files

    def select(self, selector_config):  # noqa: D406,D407,D411,D413
        """Select the test files that match the given configuration.

        Args:
            selector_config: a _SelectorConfig instance.
        Returns:
            A tuple with the list of selected tests and the list of excluded tests.
        """

        # 1. Find the root files.
        if selector_config.roots is not None:
            roots = selector_config.roots
        else:
            roots = self._test_file_explorer.read_root_file(selector_config.root)

        # 2. Create a _TestList.
        test_list = _TestList(self._test_file_explorer, roots, self._tests_are_files)
        # 3. Apply the exclude_files.
        if self._tests_are_files and selector_config.exclude_files:
            test_list.exclude_files(selector_config.exclude_files)
        # 4. Apply the tag filters.
        if selector_config.tags_expression:
            test_list.match_tag_expression(selector_config.tags_expression, self.get_tags)
        # 5. Apply the include files last with force=True to take precedence over the tags.
        if self._tests_are_files and selector_config.include_files:
            test_list.include_files(selector_config.include_files, force=True)

        return self.sort_tests(*test_list.get_tests())

    @staticmethod
    def sort_tests(tests, excluded):
        """Sort the tests before returning them."""
        if config.ORDER_TESTS_BY_NAME:
            return sorted(tests, key=str.lower), sorted(excluded, key=str.lower)
        return tests, excluded

    @staticmethod
    def get_tags(test_file):  # pylint: disable=unused-argument
        """Retrieve the tags associated with the give test file."""
        return []


class _JSTestSelectorConfig(_SelectorConfig):
    """_SelectorConfig subclass for JavaScript tests."""

    def __init__(  # pylint: disable=too-many-arguments
            self, roots=None, include_files=None, exclude_files=None, include_with_any_tags=None,
            exclude_with_any_tags=None, include_tags=None, exclude_tags=None):
        _SelectorConfig.__init__(self, roots=roots, include_files=include_files,
                                 exclude_files=exclude_files,
                                 include_with_any_tags=include_with_any_tags,
                                 exclude_with_any_tags=exclude_with_any_tags,
                                 include_tags=include_tags, exclude_tags=exclude_tags)


class _JSTestSelector(_Selector):
    """_Selector subclass for JavaScript tests."""

    def __init__(self, test_file_explorer):
        _Selector.__init__(self, test_file_explorer)
        self._tags = self._test_file_explorer.parse_tag_file("js_test")

    def get_tags(self, test_file):
        """Return tags from test_file."""
        file_tags = self._test_file_explorer.jstest_tags(test_file)
        if test_file in self._tags:
            return list(set(file_tags) | set(self._tags[test_file]))
        return file_tags


class _MultiJSTestSelectorConfig(_JSTestSelectorConfig):
    """_SelectorConfig subclass for selecting groups of JavaScript tests."""

    def __init__(self, group_size=None, group_count_multiplier=1, **kwargs):
        """Init function.

        :param group_size: number of tests in each group.
        :param group_count_multiplier: number of times to schedule each workload, can be a decimal.
               E.g. 2.5 means half of the workloads get scheduled twice, and half get scheduled
               3 times.
        :param kwargs: arguments forwarded to the superclass.
        """
        _JSTestSelectorConfig.__init__(self, **kwargs)
        self.group_size = group_size
        self.group_count_multiplier = group_count_multiplier


class _MultiJSTestSelector(_JSTestSelector):
    """_Selector subclass for selecting one group of JavaScript tests at a time.

    Each group can include one or more tests.

    E.g. [[test1.js, test2.js], [test3.js, test4.js]].
    """

    def select(self, selector_config):
        """Select the tests as follows.

        1. Create a corpus of tests to group by concatenating shuffled lists of raw tests
           until we exceed "total_tests" number of tests.
        2. Slice the corpus into "group_size" lists, put these lists in "grouped_tests".
        """
        tests, excluded = _JSTestSelector.select(self, selector_config)

        group_size = selector_config.group_size
        multi = selector_config.group_count_multiplier

        # We use the group size as a sentinel to determine if the tests are coming from
        # the command line, in which case group_size would be undefined. For command line
        # tests, we assume the user is trying to repro a certain issue, so we group all
        # of the tests together.
        if group_size is None:
            multi = 1
            group_size = len(tests)

        grouped_tests = []

        start = 0
        corpus = tests[:]
        random.shuffle(corpus)

        num_groups = len(tests) * multi / group_size
        while len(grouped_tests) < num_groups:
            if start + group_size > len(corpus):
                recycled_tests = corpus[:start]
                random.shuffle(recycled_tests)
                corpus = corpus[start:] + recycled_tests
                start = 0
            grouped_tests.append(corpus[start:start + group_size])
            start += group_size
        return grouped_tests, excluded

    @staticmethod
    def sort_tests(tests, excluded):
        """There is no need to sort FSM test groups."""
        return tests, excluded


class _CppTestSelectorConfig(_SelectorConfig):
    """_SelectorConfig subclass for cpp_integration_test and cpp_unit_test tests."""

    def __init__(self, root=config.DEFAULT_INTEGRATION_TEST_LIST, roots=None, include_files=None,
                 exclude_files=None):
        """Initialize _CppTestSelectorConfig."""
        if roots:
            # The 'roots' argument is only present when tests are specified on the command line
            # and in that case they take precedence over the tests in the root file.
            _SelectorConfig.__init__(self, roots=roots, include_files=include_files,
                                     exclude_files=exclude_files)
        else:
            _SelectorConfig.__init__(self, root=root, include_files=include_files,
                                     exclude_files=exclude_files)


class _CppTestSelector(_Selector):
    """_Selector subclass for cpp_integration_test and cpp_unit_test tests."""

    def __init__(self, test_file_explorer):
        """Initialize _CppTestSelector."""
        _Selector.__init__(self, test_file_explorer)

    def select(self, selector_config):
        """Return selected tests."""
        if selector_config.roots:
            # Tests have been specified on the command line. We use them without additional
            # filtering.
            test_list = _TestList(self._test_file_explorer, selector_config.roots)
            return test_list.get_tests()
        return _Selector.select(self, selector_config)


class _DbTestSelectorConfig(_SelectorConfig):
    """_Selector config subclass for db_test tests."""

    def __init__(self, binary=None, roots=None, include_suites=None):
        """Initialize _DbTestSelectorConfig."""
        _SelectorConfig.__init__(self, roots=roots)
        self.include_suites = utils.default_if_none(include_suites, [])

        # Command line option overrides the YAML configuration.
        binary = utils.default_if_none(config.DBTEST_EXECUTABLE, binary)
        # Use the default if nothing specified.
        binary = utils.default_if_none(binary, config.DEFAULT_DBTEST_EXECUTABLE)
        # Ensure that executable files on Windows have a ".exe" extension.
        if sys.platform == "win32" and os.path.splitext(binary)[1] != ".exe":
            binary += ".exe"
        self.binary = binary


class _DbTestSelector(_Selector):
    """_Selector subclass for db_test tests."""

    def __init__(self, test_file_explorer):
        """Initialize _DbTestSelector."""
        _Selector.__init__(self, test_file_explorer, tests_are_files=False)

    def select(self, selector_config):
        """Return selected tests."""
        if selector_config.roots:
            roots = selector_config.roots
        else:
            if not self._test_file_explorer.isfile(selector_config.binary):
                raise IOError(errno.ENOENT, "File not found", selector_config.binary)
            roots = self._test_file_explorer.list_dbtests(selector_config.binary)

        if config.INCLUDE_WITH_ANY_TAGS:
            # The db_tests do not currently support tags so we always return an empty array when the
            # --includeWithAnyTags option is used.
            return [], roots

        if selector_config.roots:
            # Tests have been specified on the command line. We use them without additional
            # filtering.
            return selector_config.roots, []

        if not selector_config.include_suites:
            return roots, []

        test_files = _TestList(self._test_file_explorer, roots, tests_are_files=False)
        test_files.include_any_pattern(selector_config.include_suites)

        return test_files.get_tests()


class _FileBasedSelectorConfig(_SelectorConfig):
    """_SelectorConfig subclass for json_schema_test and mql_model_mongod_test tests."""

    def __init__(self, roots, include_files=None, exclude_files=None):
        """Initialize _FileBasedSelectorConfig."""
        _SelectorConfig.__init__(self, roots=roots, include_files=include_files,
                                 exclude_files=exclude_files)


class _SleepTestCaseSelectorConfig(_SelectorConfig):
    """_SelectorConfig subclass for sleep_test tests."""

    def __init__(self, roots):
        """Initialize _SleepTestCaseSelectorConfig."""
        _SelectorConfig.__init__(self, roots=roots)


class _SleepTestCaseSelector(_Selector):
    """_Selector subclass for sleep_test tests."""

    def __init__(self, test_file_explorer):
        """Initialize _SleepTestCaseSelector."""
        _Selector.__init__(self, test_file_explorer, tests_are_files=False)


class _PyTestCaseSelectorConfig(_SelectorConfig):
    """_SelectorConfig subclass for py_test tests."""

    def __init__(self, roots, include_files=None, exclude_files=None):
        _SelectorConfig.__init__(self, roots=roots, include_files=include_files,
                                 exclude_files=exclude_files)


class _GennylibTestCaseSelectorConfig(_SelectorConfig):
    """_SelectorConfig subclass for gennylib_test tests."""

    def __init__(self):
        """Initialize _GennylibTestCaseSelectorConfig."""
        _SelectorConfig.__init__(self, roots=["dummy-gennylib-test-roots"])


class _GennylibTestCaseSelector(_Selector):
    """_Selector subclass for gennylib_test tests."""

    def __init__(self, test_file_explorer):
        """Initialize _GennylibTestCaseSelector."""
        _Selector.__init__(self, test_file_explorer, tests_are_files=False)


##########################################
# Module entry point for filtering tests #
##########################################

_DEFAULT_TEST_FILE_EXPLORER = TestFileExplorer()

_SELECTOR_REGISTRY = {
    "cpp_integration_test": (_CppTestSelectorConfig, _CppTestSelector),
    "cpp_unit_test": (_CppTestSelectorConfig, _CppTestSelector),
    "benchmark_test": (_CppTestSelectorConfig, _CppTestSelector),
    "benchrun_embedded_test": (_FileBasedSelectorConfig, _Selector),
    "db_test": (_DbTestSelectorConfig, _DbTestSelector),
    "fsm_workload_test": (_JSTestSelectorConfig, _JSTestSelector),
    "parallel_fsm_workload_test": (_MultiJSTestSelectorConfig, _MultiJSTestSelector),
    "json_schema_test": (_FileBasedSelectorConfig, _Selector),
    "js_test": (_JSTestSelectorConfig, _JSTestSelector),
    "mql_model_haskell_test": (_FileBasedSelectorConfig, _Selector),
    "mql_model_mongod_test": (_FileBasedSelectorConfig, _Selector),
    "multi_stmt_txn_passthrough": (_JSTestSelectorConfig, _JSTestSelector),
    "py_test": (_PyTestCaseSelectorConfig, _Selector),
    "sleep_test": (_SleepTestCaseSelectorConfig, _SleepTestCaseSelector),
    "genny_test": (_FileBasedSelectorConfig, _Selector),
    "gennylib_test": (_GennylibTestCaseSelectorConfig, _GennylibTestCaseSelector),
}


def filter_tests(test_kind, selector_config, test_file_explorer=_DEFAULT_TEST_FILE_EXPLORER):
    """Filter the tests according to a specified configuration.

    Args:
        test_kind: the test kind, from _SELECTOR_REGISTRY.
        selector_config: a dict containing the selector configuration.
        test_file_explorer: the TestFileExplorer to use. Using a TestFileExplorer other than
        the default one should not be needed except for mocking purposes.
    """
    if test_kind not in _SELECTOR_REGISTRY:
        raise ValueError("Unknown test kind '{}'".format(test_kind))
    selector_config_class, selector_class = _SELECTOR_REGISTRY[test_kind]
    selector = selector_class(test_file_explorer)
    selector_config = selector_config_class(**selector_config)
    return selector.select(selector_config)