summaryrefslogtreecommitdiff
path: root/buildscripts/linter
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2017-03-28 11:08:01 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2017-03-28 11:08:01 -0400
commit00ee4f5156348477b9dd3f71b747104794f766c0 (patch)
tree80e1a2a5aa5297038e727004a9ed6f7ab10ed076 /buildscripts/linter
parent1114c0c3fdbc1e766b50bbd17a8cfc2606528335 (diff)
downloadmongo-00ee4f5156348477b9dd3f71b747104794f766c0.tar.gz
SERVER-28465: Refactor eslint.py and clang_format.py
Diffstat (limited to 'buildscripts/linter')
-rw-r--r--buildscripts/linter/__init__.py1
-rw-r--r--buildscripts/linter/git.py277
-rw-r--r--buildscripts/linter/parallel.py70
3 files changed, 348 insertions, 0 deletions
diff --git a/buildscripts/linter/__init__.py b/buildscripts/linter/__init__.py
new file mode 100644
index 00000000000..4b7a2bb941b
--- /dev/null
+++ b/buildscripts/linter/__init__.py
@@ -0,0 +1 @@
+"""Empty."""
diff --git a/buildscripts/linter/git.py b/buildscripts/linter/git.py
new file mode 100644
index 00000000000..c8f95149750
--- /dev/null
+++ b/buildscripts/linter/git.py
@@ -0,0 +1,277 @@
+from __future__ import absolute_import
+from __future__ import print_function
+
+import itertools
+import os
+import re
+import subprocess
+
+from buildscripts import moduleconfig
+from buildscripts.resmokelib.utils import globstar
+
+# Path to the modules in the mongodb source tree
+# Has to match the string in SConstruct
+MODULE_DIR = "src/mongo/db/modules"
+
+def get_base_dir():
+ """Get the base directory for mongo repo.
+ This script assumes that it is running in buildscripts/, and uses
+ that to find the base directory.
+ """
+ try:
+ return subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).rstrip()
+ except:
+ # We are not in a valid git directory. Use the script path instead.
+ return os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+
+def get_repos():
+ """Get a list of Repos to check clang-format for
+ """
+ base_dir = get_base_dir()
+
+ # Get a list of modules
+ # TODO: how do we filter rocks, does it matter?
+ mongo_modules = moduleconfig.discover_module_directories(
+ os.path.join(base_dir, MODULE_DIR), None)
+
+ paths = [os.path.join(base_dir, MODULE_DIR, m) for m in mongo_modules]
+
+ paths.append(base_dir)
+
+ return [Repo(p) for p in paths]
+
+class Repo(object):
+ """Class encapsulates all knowledge about a git repository, and its metadata
+ to run clang-format.
+ """
+ def __init__(self, path):
+ self.path = path
+
+ def _callgito(self, args):
+ """Call git for this repository, and return the captured output
+ """
+ # These two flags are the equivalent of -C in newer versions of Git
+ # but we use these to support versions pre 1.8.5 but it depends on the command
+ # and what the current directory is
+ if "ls-files" in args:
+ # This command depends on the current directory and works better if not run with
+ # work-tree
+ return subprocess.check_output(['git', '--git-dir', os.path.join(self.path, ".git")]
+ + args)
+ else:
+ return subprocess.check_output(['git', '--git-dir', os.path.join(self.path, ".git"),
+ '--work-tree', self.path] + args)
+
+ def _callgit(self, args):
+ """Call git for this repository without capturing output
+ This is designed to be used when git returns non-zero exit codes.
+ """
+ # These two flags are the equivalent of -C in newer versions of Git
+ # but we use these to support versions pre 1.8.5 but it depends on the command
+ # and what the current directory is
+ return subprocess.call(['git', '--git-dir', os.path.join(self.path, ".git"),
+ '--work-tree', self.path] + args)
+
+ def _get_local_dir(self, path):
+ """Get a directory path relative to the git root directory
+ """
+ if os.path.isabs(path):
+ path = os.path.relpath(path, self.path)
+
+ # Normalize Windows style paths to Unix style which git uses on all platforms
+ path = path.replace("\\", "/")
+
+ return path
+
+ def get_candidates(self, candidates, filter_function):
+ """Get the set of candidate files to check by querying the repository
+
+ Returns the full path to the file for clang-format to consume.
+ """
+ if candidates is not None and len(candidates) > 0:
+ candidates = [self._get_local_dir(f) for f in candidates]
+ valid_files = list(set(candidates).intersection(self.get_candidate_files(filter_function)))
+ else:
+ valid_files = list(self.get_candidate_files(filter_function))
+
+ # Get the full file name here
+ valid_files = [os.path.normpath(os.path.join(self.path, f)) for f in valid_files]
+
+ return valid_files
+
+ def _git_ls_files(self, cmd, filter_function):
+ """Run git-ls-files and filter the list of files to a valid candidate list
+ """
+ gito = self._callgito(cmd)
+
+ # This allows us to pick all the interesting files
+ # in the mongo and mongo-enterprise repos
+ file_list = [line.rstrip()
+ for line in gito.splitlines() if filter_function(line.rstrip())]
+
+ return file_list
+
+ def get_candidate_files(self, filter_function):
+ """Query git to get a list of all files in the repo to consider for analysis
+ """
+ return self._git_ls_files(["ls-files", "--cached"], filter_function)
+
+ def get_working_tree_candidate_files(self, filter_function):
+ """Query git to get a list of all files in the working tree to consider for analysis
+ """
+ return self._git_ls_files(["ls-files", "--cached", "--others"], filter_function)
+
+ def get_working_tree_candidates(self, filter_function):
+ """Get the set of candidate files to check by querying the repository
+
+ Returns the full path to the file for clang-format to consume.
+ """
+ valid_files = list(self.get_working_tree_candidate_files(filter_function))
+
+ # Get the full file name here
+ valid_files = [os.path.normpath(os.path.join(self.path, f)) for f in valid_files]
+
+ # Filter out files that git thinks exist but were removed.
+ valid_files = [f for f in valid_files if os.path.exists(f)]
+
+ return valid_files
+
+ def is_detached(self):
+ """Is the current working tree in a detached HEAD state?
+ """
+ # symbolic-ref returns 1 if the repo is in a detached HEAD state
+ return self._callgit(["symbolic-ref", "--quiet", "HEAD"])
+
+ def is_ancestor(self, parent, child):
+ """Is the specified parent hash an ancestor of child hash?
+ """
+ # merge base returns 0 if parent is an ancestor of child
+ return not self._callgit(["merge-base", "--is-ancestor", parent, child])
+
+ def is_commit(self, sha1):
+ """Is the specified hash a valid git commit?
+ """
+ # cat-file -e returns 0 if it is a valid hash
+ return not self._callgit(["cat-file", "-e", "%s^{commit}" % sha1])
+
+ def is_working_tree_dirty(self):
+ """Does the current working tree have changes?
+ """
+ # diff returns 1 if the working tree has local changes
+ return self._callgit(["diff", "--quiet"])
+
+ def does_branch_exist(self, branch):
+ """Does the branch exist?
+ """
+ # rev-parse returns 0 if the branch exists
+ return not self._callgit(["rev-parse", "--verify", branch])
+
+ def get_merge_base(self, commit):
+ """Get the merge base between 'commit' and HEAD
+ """
+ return self._callgito(["merge-base", "HEAD", commit]).rstrip()
+
+ def get_branch_name(self):
+ """Get the current branch name, short form
+ This returns "master", not "refs/head/master"
+ Will not work if the current branch is detached
+ """
+ branch = self.rev_parse(["--abbrev-ref", "HEAD"])
+ if branch == "HEAD":
+ raise ValueError("Branch is currently detached")
+
+ return branch
+
+ def add(self, command):
+ """git add wrapper
+ """
+ return self._callgito(["add"] + command)
+
+ def checkout(self, command):
+ """git checkout wrapper
+ """
+ return self._callgito(["checkout"] + command)
+
+ def commit(self, command):
+ """git commit wrapper
+ """
+ return self._callgito(["commit"] + command)
+
+ def diff(self, command):
+ """git diff wrapper
+ """
+ return self._callgito(["diff"] + command)
+
+ def log(self, command):
+ """git log wrapper
+ """
+ return self._callgito(["log"] + command)
+
+ def rev_parse(self, command):
+ """git rev-parse wrapper
+ """
+ return self._callgito(["rev-parse"] + command).rstrip()
+
+ def rm(self, command):
+ """git rm wrapper
+ """
+ return self._callgito(["rm"] + command)
+
+ def show(self, command):
+ """git show wrapper
+ """
+ return self._callgito(["show"] + command)
+
+
+def expand_file_string(glob_pattern):
+ """Expand a string that represents a set of files
+ """
+ return [os.path.abspath(f) for f in globstar.iglob(glob_pattern)]
+
+def get_files_to_check_working_tree(filter_function):
+ """Get a list of files to check from the working tree.
+ This will pick up files not managed by git.
+ """
+ repos = get_repos()
+
+ valid_files = list(itertools.chain.from_iterable([r.get_working_tree_candidates(filter_function) for r in repos]))
+
+ return valid_files
+
+def get_files_to_check(files, filter_function):
+ """Get a list of files that need to be checked
+ based on which files are managed by git.
+ """
+ # Get a list of candidate_files
+ candidates = [expand_file_string(f) for f in files]
+ candidates = list(itertools.chain.from_iterable(candidates))
+
+ if len(files) > 0 and len(candidates) == 0:
+ raise ValueError("Globs '%s' did not find any files." % (files))
+
+ repos = get_repos()
+
+ valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates, filter_function) for r in repos]))
+
+ return valid_files
+
+def get_files_to_check_from_patch(patches, filter_function):
+ """Take a patch file generated by git diff, and scan the patch for a list of files to check.
+ """
+ candidates = []
+
+ # Get a list of candidate_files
+ check = re.compile(r"^diff --git a\/([\w\/\.\-]+) b\/[\w\/\.\-]+")
+
+ lines = []
+ for patch in patches:
+ with open(patch, "rb") as infile:
+ lines += infile.readlines()
+
+ candidates = [check.match(line).group(1) for line in lines if check.match(line)]
+
+ repos = get_repos()
+
+ valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates, filter_function) for r in repos]))
+
+ return valid_files
diff --git a/buildscripts/linter/parallel.py b/buildscripts/linter/parallel.py
new file mode 100644
index 00000000000..95fee2c7c3b
--- /dev/null
+++ b/buildscripts/linter/parallel.py
@@ -0,0 +1,70 @@
+from __future__ import absolute_import
+from __future__ import print_function
+
+import Queue
+import threading
+import time
+from multiprocessing import cpu_count
+
+def parallel_process(items, func):
+ """Run a set of work items to completion
+ """
+ try:
+ cpus = cpu_count()
+ except NotImplementedError:
+ cpus = 1
+
+ task_queue = Queue.Queue()
+
+ # Use a list so that worker function will capture this variable
+ pp_event = threading.Event()
+ pp_result = [True]
+ pp_lock = threading.Lock()
+
+ def worker():
+ """Worker thread to process work items in parallel
+ """
+ while not pp_event.is_set():
+ try:
+ item = task_queue.get_nowait()
+ except Queue.Empty:
+ # if the queue is empty, exit the worker thread
+ pp_event.set()
+ return
+
+ try:
+ ret = func(item)
+ finally:
+ # Tell the queue we finished with the item
+ task_queue.task_done()
+
+ # Return early if we fail, and signal we are done
+ if not ret:
+ with pp_lock:
+ pp_result[0] = False
+
+ pp_event.set()
+ return
+
+ # Enqueue all the work we want to process
+ for item in items:
+ task_queue.put(item)
+
+ # Process all the work
+ threads = []
+ for cpu in range(cpus):
+ thread = threading.Thread(target=worker)
+
+ thread.daemon = True
+ thread.start()
+ threads.append(thread)
+
+ # Wait for the threads to finish
+ # Loop with a timeout so that we can process Ctrl-C interrupts
+ while not pp_event.wait(1):
+ time.sleep(1)
+
+ for thread in threads:
+ thread.join()
+
+ return pp_result[0]