diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2017-03-28 11:08:01 -0400 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2017-03-28 11:08:01 -0400 |
commit | 00ee4f5156348477b9dd3f71b747104794f766c0 (patch) | |
tree | 80e1a2a5aa5297038e727004a9ed6f7ab10ed076 /buildscripts/clang_format.py | |
parent | 1114c0c3fdbc1e766b50bbd17a8cfc2606528335 (diff) | |
download | mongo-00ee4f5156348477b9dd3f71b747104794f766c0.tar.gz |
SERVER-28465: Refactor eslint.py and clang_format.py
Diffstat (limited to 'buildscripts/clang_format.py')
-rwxr-xr-x | buildscripts/clang_format.py | 361 |
1 files changed, 18 insertions, 343 deletions
diff --git a/buildscripts/clang_format.py b/buildscripts/clang_format.py index c3910f6b1c3..cf9884d8b15 100755 --- a/buildscripts/clang_format.py +++ b/buildscripts/clang_format.py @@ -9,10 +9,8 @@ A script that provides: """ from __future__ import print_function, absolute_import -import Queue import difflib import glob -import itertools import os import re import shutil @@ -22,7 +20,6 @@ import sys import tarfile import tempfile import threading -import time import urllib2 from distutils import spawn from optparse import OptionParser @@ -32,8 +29,8 @@ from multiprocessing import cpu_count if __name__ == "__main__" and __package__ is None: sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__))))) -from buildscripts import moduleconfig - +from buildscripts.linter import git +from buildscripts.linter import parallel ############################################################################## # @@ -57,10 +54,6 @@ CLANG_FORMAT_HTTP_DARWIN_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build # Path in the tarball to the clang-format binary CLANG_FORMAT_SOURCE_TAR_BASE = string.Template("clang+llvm-$version-$tar_path/bin/" + CLANG_FORMAT_PROGNAME) -# Path to the modules in the mongodb source tree -# Has to match the string in SConstruct -MODULE_DIR = "src/mongo/db/modules" - ############################################################################## def callo(args): """Call a program, and capture its output @@ -282,350 +275,31 @@ class ClangFormat(object): return formatted +files_re = re.compile('\\.(h|cpp|js)$') -def parallel_process(items, func): - """Run a set of work items to completion - """ - try: - cpus = cpu_count() - except NotImplementedError: - cpus = 1 - - task_queue = Queue.Queue() - - # Use a list so that worker function will capture this variable - pp_event = threading.Event() - pp_result = [True] - pp_lock = threading.Lock() - - def worker(): - """Worker thread to process work items in parallel - """ - while not pp_event.is_set(): - try: - item = task_queue.get_nowait() - except Queue.Empty: - # if the queue is empty, exit the worker thread - pp_event.set() - return - - try: - ret = func(item) - finally: - # Tell the queue we finished with the item - task_queue.task_done() - - # Return early if we fail, and signal we are done - if not ret: - with pp_lock: - pp_result[0] = False - - pp_event.set() - return - - # Enqueue all the work we want to process - for item in items: - task_queue.put(item) - - # Process all the work - threads = [] - for cpu in range(cpus): - thread = threading.Thread(target=worker) - - thread.daemon = True - thread.start() - threads.append(thread) - - # Wait for the threads to finish - # Loop with a timeout so that we can process Ctrl-C interrupts - # Note: On Python 2.6 wait always returns None so we check is_set also, - # This works because we only set the event once, and never reset it - while not pp_event.wait(1) and not pp_event.is_set(): - time.sleep(1) - - for thread in threads: - thread.join() - - return pp_result[0] - -def get_base_dir(): - """Get the base directory for mongo repo. - This script assumes that it is running in buildscripts/, and uses - that to find the base directory. - """ - try: - return subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).rstrip() - except: - # We are not in a valid git directory. Use the script path instead. - return os.path.dirname(os.path.dirname(os.path.realpath(__file__))) - -def get_repos(): - """Get a list of Repos to check clang-format for - """ - base_dir = get_base_dir() - - # Get a list of modules - # TODO: how do we filter rocks, does it matter? - mongo_modules = moduleconfig.discover_module_directories( - os.path.join(base_dir, MODULE_DIR), None) - - paths = [os.path.join(base_dir, MODULE_DIR, m) for m in mongo_modules] - - paths.append(base_dir) - - return [Repo(p) for p in paths] - - -class Repo(object): - """Class encapsulates all knowledge about a git repository, and its metadata - to run clang-format. +def is_interesting_file(file_name): + """"Return true if this file should be checked """ - def __init__(self, path): - self.path = path - - self.root = self._get_root() - - def _callgito(self, args): - """Call git for this repository, and return the captured output - """ - # These two flags are the equivalent of -C in newer versions of Git - # but we use these to support versions pre 1.8.5 but it depends on the command - # and what the current directory is - return callo(['git', '--git-dir', os.path.join(self.path, ".git"), - '--work-tree', self.path] + args) - - def _callgit(self, args): - """Call git for this repository without capturing output - This is designed to be used when git returns non-zero exit codes. - """ - # These two flags are the equivalent of -C in newer versions of Git - # but we use these to support versions pre 1.8.5 but it depends on the command - # and what the current directory is - return subprocess.call(['git', '--git-dir', os.path.join(self.path, ".git"), - '--work-tree', self.path] + args) - - def _get_local_dir(self, path): - """Get a directory path relative to the git root directory - """ - if os.path.isabs(path): - return os.path.relpath(path, self.root) - return path - - def get_candidates(self, candidates): - """Get the set of candidate files to check by querying the repository - - Returns the full path to the file for clang-format to consume. - """ - if candidates is not None and len(candidates) > 0: - candidates = [self._get_local_dir(f) for f in candidates] - valid_files = list(set(candidates).intersection(self.get_candidate_files())) - else: - valid_files = list(self.get_candidate_files()) - - # Get the full file name here - valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files] - - return valid_files - - def get_root(self): - """Get the root directory for this repository - """ - return self.root - - def _get_root(self): - """Gets the root directory for this repository from git - """ - gito = self._callgito(['rev-parse', '--show-toplevel']) - - return gito.rstrip() - - def _git_ls_files(self, cmd): - """Run git-ls-files and filter the list of files to a valid candidate list - """ - gito = self._callgito(cmd) - - # This allows us to pick all the interesting files - # in the mongo and mongo-enterprise repos - file_list = [line.rstrip() - for line in gito.splitlines() - if (line.startswith("jstests") or line.startswith("src")) - and not line.startswith("src/third_party/") - and not line.startswith("src/mongo/gotools/")] - - files_match = re.compile('\\.(h|cpp|js)$') - - file_list = [a for a in file_list if files_match.search(a)] - - return file_list - - def get_candidate_files(self): - """Query git to get a list of all files in the repo to consider for analysis - """ - return self._git_ls_files(["ls-files", "--cached"]) - - def get_working_tree_candidate_files(self): - """Query git to get a list of all files in the working tree to consider for analysis - """ - return self._git_ls_files(["ls-files", "--cached", "--others"]) - - def get_working_tree_candidates(self): - """Get the set of candidate files to check by querying the repository - - Returns the full path to the file for clang-format to consume. - """ - valid_files = list(self.get_working_tree_candidate_files()) - - # Get the full file name here - valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files] - - # Filter out files that git thinks exist but were removed. - valid_files = [f for f in valid_files if os.path.exists(f)] - - return valid_files - - def is_detached(self): - """Is the current working tree in a detached HEAD state? - """ - # symbolic-ref returns 1 if the repo is in a detached HEAD state - return self._callgit(["symbolic-ref", "--quiet", "HEAD"]) - - def is_ancestor(self, parent, child): - """Is the specified parent hash an ancestor of child hash? - """ - # merge base returns 0 if parent is an ancestor of child - return not self._callgit(["merge-base", "--is-ancestor", parent, child]) - - def is_commit(self, sha1): - """Is the specified hash a valid git commit? - """ - # cat-file -e returns 0 if it is a valid hash - return not self._callgit(["cat-file", "-e", "%s^{commit}" % sha1]) - - def is_working_tree_dirty(self): - """Does the current working tree have changes? - """ - # diff returns 1 if the working tree has local changes - return self._callgit(["diff", "--quiet"]) - - def does_branch_exist(self, branch): - """Does the branch exist? - """ - # rev-parse returns 0 if the branch exists - return not self._callgit(["rev-parse", "--verify", branch]) - - def get_merge_base(self, commit): - """Get the merge base between 'commit' and HEAD - """ - return self._callgito(["merge-base", "HEAD", commit]).rstrip() - - def get_branch_name(self): - """Get the current branch name, short form - This returns "master", not "refs/head/master" - Will not work if the current branch is detached - """ - branch = self.rev_parse(["--abbrev-ref", "HEAD"]) - if branch == "HEAD": - raise ValueError("Branch is currently detached") - - return branch - - def add(self, command): - """git add wrapper - """ - return self._callgito(["add"] + command) - - def checkout(self, command): - """git checkout wrapper - """ - return self._callgito(["checkout"] + command) - - def commit(self, command): - """git commit wrapper - """ - return self._callgito(["commit"] + command) - - def diff(self, command): - """git diff wrapper - """ - return self._callgito(["diff"] + command) - - def log(self, command): - """git log wrapper - """ - return self._callgito(["log"] + command) - - def rev_parse(self, command): - """git rev-parse wrapper - """ - return self._callgito(["rev-parse"] + command).rstrip() - - def rm(self, command): - """git rm wrapper - """ - return self._callgito(["rm"] + command) - - def show(self, command): - """git show wrapper - """ - return self._callgito(["show"] + command) + return ((file_name.startswith("jstests") or file_name.startswith("src")) + and not file_name.startswith("src/third_party/") + and not file_name.startswith("src/mongo/gotools/")) and files_re.search(file_name) def get_list_from_lines(lines): """"Convert a string containing a series of lines into a list of strings """ return [line.rstrip() for line in lines.splitlines()] -def get_files_to_check_working_tree(): - """Get a list of files to check form the working tree. - This will pick up files not managed by git. - """ - repos = get_repos() - - valid_files = list(itertools.chain.from_iterable([r.get_working_tree_candidates() for r in repos])) - - return valid_files - -def get_files_to_check(): - """Get a list of files that need to be checked - based on which files are managed by git. - """ - repos = get_repos() - - valid_files = list(itertools.chain.from_iterable([r.get_candidates(None) for r in repos])) - - return valid_files - -def get_files_to_check_from_patch(patches): - """Take a patch file generated by git diff, and scan the patch for a list of files to check. - """ - candidates = [] - - # Get a list of candidate_files - check = re.compile(r"^diff --git a\/([\w\/\.\-]+) b\/[\w\/\.\-]+") - - lines = [] - for patch in patches: - with open(patch, "rb") as infile: - lines += infile.readlines() - - candidates = [check.match(line).group(1) for line in lines if check.match(line)] - - repos = get_repos() - - valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates) for r in repos])) - - return valid_files - def _get_build_dir(): """Get the location of the scons' build directory in case we need to download clang-format """ - return os.path.join(get_base_dir(), "build") + return os.path.join(git.get_base_dir(), "build") def _lint_files(clang_format, files): """Lint a list of files with clang-format """ clang_format = ClangFormat(clang_format, _get_build_dir()) - lint_clean = parallel_process([os.path.abspath(f) for f in files], clang_format.lint) + lint_clean = parallel.parallel_process([os.path.abspath(f) for f in files], clang_format.lint) if not lint_clean: print("ERROR: Code Style does not match coding style") @@ -634,7 +308,7 @@ def _lint_files(clang_format, files): def lint_patch(clang_format, infile): """Lint patch command entry point """ - files = get_files_to_check_from_patch(infile) + files = git.get_files_to_check_from_patch(infile, is_interesting_file) # Patch may have files that we do not want to check which is fine if files: @@ -643,7 +317,7 @@ def lint_patch(clang_format, infile): def lint(clang_format): """Lint files command entry point """ - files = get_files_to_check() + files = git.get_files_to_check([], is_interesting_file) _lint_files(clang_format, files) @@ -652,7 +326,7 @@ def lint(clang_format): def lint_all(clang_format): """Lint files command entry point based on working tree """ - files = get_files_to_check_working_tree() + files = git.get_files_to_check_working_tree(is_interesting_file) _lint_files(clang_format, files) @@ -663,7 +337,8 @@ def _format_files(clang_format, files): """ clang_format = ClangFormat(clang_format, _get_build_dir()) - format_clean = parallel_process([os.path.abspath(f) for f in files], clang_format.format) + format_clean = parallel.parallel_process([os.path.abspath(f) for f in files], + clang_format.format) if not format_clean: print("ERROR: failed to format files") @@ -672,7 +347,7 @@ def _format_files(clang_format, files): def format_func(clang_format): """Format files command entry point """ - files = get_files_to_check() + files = git.get_files_to_check([], is_interesting_file) _format_files(clang_format, files) @@ -681,13 +356,13 @@ def reformat_branch(clang_format, commit_prior_to_reformat, commit_after_reforma """ clang_format = ClangFormat(clang_format, _get_build_dir()) - if os.getcwd() != get_base_dir(): + if os.getcwd() != git.get_base_dir(): raise ValueError("reformat-branch must be run from the repo root") if not os.path.exists("buildscripts/clang_format.py"): raise ValueError("reformat-branch is only supported in the mongo repo") - repo = Repo(get_base_dir()) + repo = git.Repo(git.get_base_dir()) # Validate that user passes valid commits if not repo.is_commit(commit_prior_to_reformat): |