diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2017-03-28 11:08:01 -0400 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2017-03-28 11:08:01 -0400 |
commit | 00ee4f5156348477b9dd3f71b747104794f766c0 (patch) | |
tree | 80e1a2a5aa5297038e727004a9ed6f7ab10ed076 /buildscripts/linter | |
parent | 1114c0c3fdbc1e766b50bbd17a8cfc2606528335 (diff) | |
download | mongo-00ee4f5156348477b9dd3f71b747104794f766c0.tar.gz |
SERVER-28465: Refactor eslint.py and clang_format.py
Diffstat (limited to 'buildscripts/linter')
-rw-r--r-- | buildscripts/linter/__init__.py | 1 | ||||
-rw-r--r-- | buildscripts/linter/git.py | 277 | ||||
-rw-r--r-- | buildscripts/linter/parallel.py | 70 |
3 files changed, 348 insertions, 0 deletions
diff --git a/buildscripts/linter/__init__.py b/buildscripts/linter/__init__.py new file mode 100644 index 00000000000..4b7a2bb941b --- /dev/null +++ b/buildscripts/linter/__init__.py @@ -0,0 +1 @@ +"""Empty.""" diff --git a/buildscripts/linter/git.py b/buildscripts/linter/git.py new file mode 100644 index 00000000000..c8f95149750 --- /dev/null +++ b/buildscripts/linter/git.py @@ -0,0 +1,277 @@ +from __future__ import absolute_import +from __future__ import print_function + +import itertools +import os +import re +import subprocess + +from buildscripts import moduleconfig +from buildscripts.resmokelib.utils import globstar + +# Path to the modules in the mongodb source tree +# Has to match the string in SConstruct +MODULE_DIR = "src/mongo/db/modules" + +def get_base_dir(): + """Get the base directory for mongo repo. + This script assumes that it is running in buildscripts/, and uses + that to find the base directory. + """ + try: + return subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).rstrip() + except: + # We are not in a valid git directory. Use the script path instead. + return os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + +def get_repos(): + """Get a list of Repos to check clang-format for + """ + base_dir = get_base_dir() + + # Get a list of modules + # TODO: how do we filter rocks, does it matter? + mongo_modules = moduleconfig.discover_module_directories( + os.path.join(base_dir, MODULE_DIR), None) + + paths = [os.path.join(base_dir, MODULE_DIR, m) for m in mongo_modules] + + paths.append(base_dir) + + return [Repo(p) for p in paths] + +class Repo(object): + """Class encapsulates all knowledge about a git repository, and its metadata + to run clang-format. + """ + def __init__(self, path): + self.path = path + + def _callgito(self, args): + """Call git for this repository, and return the captured output + """ + # These two flags are the equivalent of -C in newer versions of Git + # but we use these to support versions pre 1.8.5 but it depends on the command + # and what the current directory is + if "ls-files" in args: + # This command depends on the current directory and works better if not run with + # work-tree + return subprocess.check_output(['git', '--git-dir', os.path.join(self.path, ".git")] + + args) + else: + return subprocess.check_output(['git', '--git-dir', os.path.join(self.path, ".git"), + '--work-tree', self.path] + args) + + def _callgit(self, args): + """Call git for this repository without capturing output + This is designed to be used when git returns non-zero exit codes. + """ + # These two flags are the equivalent of -C in newer versions of Git + # but we use these to support versions pre 1.8.5 but it depends on the command + # and what the current directory is + return subprocess.call(['git', '--git-dir', os.path.join(self.path, ".git"), + '--work-tree', self.path] + args) + + def _get_local_dir(self, path): + """Get a directory path relative to the git root directory + """ + if os.path.isabs(path): + path = os.path.relpath(path, self.path) + + # Normalize Windows style paths to Unix style which git uses on all platforms + path = path.replace("\\", "/") + + return path + + def get_candidates(self, candidates, filter_function): + """Get the set of candidate files to check by querying the repository + + Returns the full path to the file for clang-format to consume. + """ + if candidates is not None and len(candidates) > 0: + candidates = [self._get_local_dir(f) for f in candidates] + valid_files = list(set(candidates).intersection(self.get_candidate_files(filter_function))) + else: + valid_files = list(self.get_candidate_files(filter_function)) + + # Get the full file name here + valid_files = [os.path.normpath(os.path.join(self.path, f)) for f in valid_files] + + return valid_files + + def _git_ls_files(self, cmd, filter_function): + """Run git-ls-files and filter the list of files to a valid candidate list + """ + gito = self._callgito(cmd) + + # This allows us to pick all the interesting files + # in the mongo and mongo-enterprise repos + file_list = [line.rstrip() + for line in gito.splitlines() if filter_function(line.rstrip())] + + return file_list + + def get_candidate_files(self, filter_function): + """Query git to get a list of all files in the repo to consider for analysis + """ + return self._git_ls_files(["ls-files", "--cached"], filter_function) + + def get_working_tree_candidate_files(self, filter_function): + """Query git to get a list of all files in the working tree to consider for analysis + """ + return self._git_ls_files(["ls-files", "--cached", "--others"], filter_function) + + def get_working_tree_candidates(self, filter_function): + """Get the set of candidate files to check by querying the repository + + Returns the full path to the file for clang-format to consume. + """ + valid_files = list(self.get_working_tree_candidate_files(filter_function)) + + # Get the full file name here + valid_files = [os.path.normpath(os.path.join(self.path, f)) for f in valid_files] + + # Filter out files that git thinks exist but were removed. + valid_files = [f for f in valid_files if os.path.exists(f)] + + return valid_files + + def is_detached(self): + """Is the current working tree in a detached HEAD state? + """ + # symbolic-ref returns 1 if the repo is in a detached HEAD state + return self._callgit(["symbolic-ref", "--quiet", "HEAD"]) + + def is_ancestor(self, parent, child): + """Is the specified parent hash an ancestor of child hash? + """ + # merge base returns 0 if parent is an ancestor of child + return not self._callgit(["merge-base", "--is-ancestor", parent, child]) + + def is_commit(self, sha1): + """Is the specified hash a valid git commit? + """ + # cat-file -e returns 0 if it is a valid hash + return not self._callgit(["cat-file", "-e", "%s^{commit}" % sha1]) + + def is_working_tree_dirty(self): + """Does the current working tree have changes? + """ + # diff returns 1 if the working tree has local changes + return self._callgit(["diff", "--quiet"]) + + def does_branch_exist(self, branch): + """Does the branch exist? + """ + # rev-parse returns 0 if the branch exists + return not self._callgit(["rev-parse", "--verify", branch]) + + def get_merge_base(self, commit): + """Get the merge base between 'commit' and HEAD + """ + return self._callgito(["merge-base", "HEAD", commit]).rstrip() + + def get_branch_name(self): + """Get the current branch name, short form + This returns "master", not "refs/head/master" + Will not work if the current branch is detached + """ + branch = self.rev_parse(["--abbrev-ref", "HEAD"]) + if branch == "HEAD": + raise ValueError("Branch is currently detached") + + return branch + + def add(self, command): + """git add wrapper + """ + return self._callgito(["add"] + command) + + def checkout(self, command): + """git checkout wrapper + """ + return self._callgito(["checkout"] + command) + + def commit(self, command): + """git commit wrapper + """ + return self._callgito(["commit"] + command) + + def diff(self, command): + """git diff wrapper + """ + return self._callgito(["diff"] + command) + + def log(self, command): + """git log wrapper + """ + return self._callgito(["log"] + command) + + def rev_parse(self, command): + """git rev-parse wrapper + """ + return self._callgito(["rev-parse"] + command).rstrip() + + def rm(self, command): + """git rm wrapper + """ + return self._callgito(["rm"] + command) + + def show(self, command): + """git show wrapper + """ + return self._callgito(["show"] + command) + + +def expand_file_string(glob_pattern): + """Expand a string that represents a set of files + """ + return [os.path.abspath(f) for f in globstar.iglob(glob_pattern)] + +def get_files_to_check_working_tree(filter_function): + """Get a list of files to check from the working tree. + This will pick up files not managed by git. + """ + repos = get_repos() + + valid_files = list(itertools.chain.from_iterable([r.get_working_tree_candidates(filter_function) for r in repos])) + + return valid_files + +def get_files_to_check(files, filter_function): + """Get a list of files that need to be checked + based on which files are managed by git. + """ + # Get a list of candidate_files + candidates = [expand_file_string(f) for f in files] + candidates = list(itertools.chain.from_iterable(candidates)) + + if len(files) > 0 and len(candidates) == 0: + raise ValueError("Globs '%s' did not find any files." % (files)) + + repos = get_repos() + + valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates, filter_function) for r in repos])) + + return valid_files + +def get_files_to_check_from_patch(patches, filter_function): + """Take a patch file generated by git diff, and scan the patch for a list of files to check. + """ + candidates = [] + + # Get a list of candidate_files + check = re.compile(r"^diff --git a\/([\w\/\.\-]+) b\/[\w\/\.\-]+") + + lines = [] + for patch in patches: + with open(patch, "rb") as infile: + lines += infile.readlines() + + candidates = [check.match(line).group(1) for line in lines if check.match(line)] + + repos = get_repos() + + valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates, filter_function) for r in repos])) + + return valid_files diff --git a/buildscripts/linter/parallel.py b/buildscripts/linter/parallel.py new file mode 100644 index 00000000000..95fee2c7c3b --- /dev/null +++ b/buildscripts/linter/parallel.py @@ -0,0 +1,70 @@ +from __future__ import absolute_import +from __future__ import print_function + +import Queue +import threading +import time +from multiprocessing import cpu_count + +def parallel_process(items, func): + """Run a set of work items to completion + """ + try: + cpus = cpu_count() + except NotImplementedError: + cpus = 1 + + task_queue = Queue.Queue() + + # Use a list so that worker function will capture this variable + pp_event = threading.Event() + pp_result = [True] + pp_lock = threading.Lock() + + def worker(): + """Worker thread to process work items in parallel + """ + while not pp_event.is_set(): + try: + item = task_queue.get_nowait() + except Queue.Empty: + # if the queue is empty, exit the worker thread + pp_event.set() + return + + try: + ret = func(item) + finally: + # Tell the queue we finished with the item + task_queue.task_done() + + # Return early if we fail, and signal we are done + if not ret: + with pp_lock: + pp_result[0] = False + + pp_event.set() + return + + # Enqueue all the work we want to process + for item in items: + task_queue.put(item) + + # Process all the work + threads = [] + for cpu in range(cpus): + thread = threading.Thread(target=worker) + + thread.daemon = True + thread.start() + threads.append(thread) + + # Wait for the threads to finish + # Loop with a timeout so that we can process Ctrl-C interrupts + while not pp_event.wait(1): + time.sleep(1) + + for thread in threads: + thread.join() + + return pp_result[0] |