diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2015-06-11 11:22:53 -0400 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2015-06-20 10:55:52 -0400 |
commit | 01965cf52bce6976637ecb8f4a622aeb05ab256a (patch) | |
tree | a4641b27e879a47adbeaac435efd9ecda80971e9 | |
parent | 1178eff1ddf60a7dac3a8cd71e2fdd278aa01b52 (diff) | |
download | mongo-01965cf52bce6976637ecb8f4a622aeb05ab256a.tar.gz |
SERVER-18575: clang_format.py
-rwxr-xr-x | buildscripts/clang_format.py | 592 | ||||
-rw-r--r-- | buildscripts/moduleconfig.py | 33 | ||||
-rw-r--r-- | buildscripts/resmokelib/utils/globstar.py | 3 |
3 files changed, 627 insertions, 1 deletions
diff --git a/buildscripts/clang_format.py b/buildscripts/clang_format.py new file mode 100755 index 00000000000..f0473c61c01 --- /dev/null +++ b/buildscripts/clang_format.py @@ -0,0 +1,592 @@ +#! /usr/bin/env python +""" +A script that provides: +1. Ability to grab binaries where possible from LLVM. +2. Ability to download binaries from MongoDB cache for clang-format. +3. Validates clang-format is the right version. +4. Has support for checking which files are to be checked. +5. Supports validating and updating a set of files to the right coding style. +""" +from __future__ import print_function, absolute_import + +import Queue +import difflib +import itertools +import os +import re +import shutil +import string +import subprocess +import sys +import tempfile +import threading +import time +import urllib +from distutils import spawn +from optparse import OptionParser +from multiprocessing import cpu_count + +# Get relative imports to work when the package is not installed on the PYTHONPATH. +if __name__ == "__main__" and __package__ is None: + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from buildscripts.resmokelib.utils import globstar +from buildscripts import moduleconfig + + +############################################################################## +# +# Constants for clang-format +# +# + +# Expected version of clang-format +CLANG_FORMAT_VERSION = "3.6.0" + +# Name of clang-format as a binary +CLANG_FORMAT_PROGNAME = "clang-format" + +# URL location of the "cached" copy of clang-format to download +# for users which do not have clang-format installed +CLANG_FORMAT_HTTP_LINUX_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-rhel55.tar.gz" + +# URL on LLVM's website to download the clang tarball +CLANG_FORMAT_SOURCE_URL_BASE = string.Template("http://llvm.org/releases/$version/clang+llvm-$version-$llvm_distro.tar.xz") + +# Path in the tarball to the clang-format binary +CLANG_FORMAT_SOURCE_TAR_BASE = string.Template("clang+llvm-$version-$tar_path/bin/" + CLANG_FORMAT_PROGNAME) + +# Path to the modules in the mongodb source tree +# Has to match the string in SConstruct +MODULE_DIR = "src/mongo/db/modules" + +# Copied from python 2.7 version of subprocess.py +# Exception classes used by this module. +class CalledProcessError(Exception): + """This exception is raised when a process run by check_call() or + check_output() returns a non-zero exit status. + The exit status will be stored in the returncode attribute; + check_output() will also store the output in the output attribute. + """ + def __init__(self, returncode, cmd, output=None): + self.returncode = returncode + self.cmd = cmd + self.output = output + def __str__(self): + return ("Command '%s' returned non-zero exit status %d with output %s" % + (self.cmd, self.returncode, self.output)) + + +# Copied from python 2.7 version of subprocess.py +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise CalledProcessError(retcode, cmd, output) + return output + +def callo(args): + """Call a program, and capture its output + """ + return check_output(args) + +def get_llvm_url(version, llvm_distro): + """Get the url to download clang-format from llvm.org + """ + return CLANG_FORMAT_SOURCE_URL_BASE.substitute( + version=version, + llvm_distro=llvm_distro) + +def get_tar_path(version, tar_path): + """ Get the path to clang-format in the llvm tarball + """ + return CLANG_FORMAT_SOURCE_TAR_BASE.substitute( + version=version, + tar_path=tar_path) + +def get_clang_format_from_llvm(llvm_distro, tar_path, dest_file): + """Download clang-format from llvm.org, unpack the tarball, + and put clang-format in the specified place + """ + # Build URL + url = get_llvm_url(CLANG_FORMAT_VERSION, llvm_distro) + + dest_dir = tempfile.gettempdir() + temp_tar_file = os.path.join(dest_dir, "temp.tar.xz") + + # Download from LLVM + print("Downloading clang-format %s from %s, saving to %s" % (CLANG_FORMAT_VERSION, + url, temp_tar_file)) + urllib.urlretrieve(url, temp_tar_file) + + # Extract just clang format file + subprocess.call(['tar', 'zxvf', temp_tar_file, '*clang-format*']) + + # Destination Path + shutil.move(get_tar_path(CLANG_FORMAT_VERSION, tar_path), dest_file) + +def get_clang_format_from_linux_cache(dest_file): + """Get clang-format from mongodb's cache + """ + # Get URL + url = CLANG_FORMAT_HTTP_LINUX_CACHE + + dest_dir = tempfile.gettempdir() + temp_tar_file = os.path.join(dest_dir, "temp.tar.xz") + + # Download the file + print("Downloading clang-format %s from %s, saving to %s" % (CLANG_FORMAT_VERSION, + url, temp_tar_file)) + urllib.urlretrieve(url, temp_tar_file) + + # Extract just clang format file + subprocess.call(['tar', 'zxvf', temp_tar_file, '*clang-format*']) + + # Destination Path + shutil.move("llvm/Release/bin/clang-format", dest_file) + + +class ClangFormat(object): + """Class encapsulates finding a suitable copy of clang-format, + and linting/formating an individual file + """ + def __init__(self, path, cache_dir): + clang_format_progname = CLANG_FORMAT_PROGNAME + + # Initialize clang-format configuration information + if sys.platform.startswith("linux"): + #"3.6.0/clang+llvm-3.6.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz + self.platform = "linux_x64" + self.llvm_distro = "x86_64-linux-gnu-ubuntu" + self.tar_path = "x86_64-linux-gnu" + elif sys.platform == "win32": + self.platform = "windows_x64" + self.llvm_distro = "windows_x64" + self.tar_path = None + clang_format_progname += ".exe" + elif sys.platform == "darwin": + #"3.6.0/clang+llvm-3.6.0-x86_64-apple-darwin.tar.xz + self.platform = "darwin_x64" + self.llvm_distro = "x86_64-apple-darwin" + self.tar_path = "x86_64-apple-darwin" + + self.path = None + + # Find Clang-Format now + if path is not None: + if os.path.isfile(path): + self.path = path + else: + print("WARNING: Could not find clang-format %s" % (path)) + + # Check the envionrment variable + if "MONGO_CLANG_FORMAT" in os.environ: + self.path = os.environ["MONGO_CLANG_FORMAT"] + + if self.path and not self._validate_version(warn=True): + self.path = None + + # Check the users' PATH environment variable now + if self.path is None: + self.path = spawn.find_executable(clang_format_progname) + + if self.path and not self._validate_version(warn=True): + self.path = None + + # If Windows, try to grab it from Program Files + if sys.platform == "win32": + win32bin = os.path.join(os.environ["ProgramFiles(x86)"], "LLVM\\bin\\clang-format.exe") + if os.path.exists(win32bin): + self.path = win32bin + + # Have not found it yet, download it from the web + if self.path is None: + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + + self.path = os.path.join(cache_dir, clang_format_progname) + + if not os.path.isfile(self.path): + if sys.platform.startswith("linux"): + get_clang_format_from_linux_cache(self.path) + elif sys.platform == "darwin": + get_clang_format_from_llvm(self.llvm_distro, self.tar_path, self.path) + else: + print("ERROR: clang-format.py does not support downloading clang-format " + + " on this platform, please install clang-format " + CLANG_FORMAT_VERSION) + + # Validate we have the correct version + self._validate_version() + + self.print_lock = threading.Lock() + + def _validate_version(self, warn=False): + """Validate clang-format is the expected version + """ + cf_version = callo([self.path, "--version"]) + + if CLANG_FORMAT_VERSION in cf_version: + return True + + if warn: + print("WARNING: clang-format found in path, but incorrect version found at " + + self.path + " with version: " + cf_version) + + return False + + def lint(self, file_name): + """Check the specified file has the correct format + """ + with open(file_name, 'r') as original_text: + original_file = original_text.read() + + # Get formatted file as clang-format would format the file + formatted_file = callo([self.path, "--style=file", file_name]) + + if original_file != formatted_file: + + original_lines = original_file.splitlines() + formatted_lines = formatted_file.splitlines() + result = difflib.unified_diff(original_lines, formatted_lines) + + # Take a lock to ensure diffs do not get mixed + with self.print_lock: + print("ERROR: Found diff for " + file_name) + print("To fix formatting errors, run %s --style=file -i %s" % + (self.path, file_name)) + for line in result: + print(line.rstrip()) + + return False + + return True + + def format(self, file_name): + """Update the format of the specified file + """ + + # Update the file with clang-format + return not subprocess.call([self.path, "--style=file", "-i", file_name]) + + +def parallel_process(items, func): + """Run a set of work items to completion + """ + try: + cpus = cpu_count() + except NotImplementedError: + cpus = 1 + + print("Running across %d cpus" % (cpus)) + + task_queue = Queue.Queue() + + # Use a list so that worker function will capture this variable + pp_event = threading.Event() + pp_result = [True] + pp_lock = threading.Lock() + + def worker(): + """Worker thread to process work items in parallel + """ + while not pp_event.is_set(): + try: + item = task_queue.get_nowait() + except Queue.Empty: + # if the queue is empty, exit the worker thread + pp_event.set() + return + + try: + ret = func(item) + finally: + # Tell the queue we finished with the item + task_queue.task_done() + + # Return early if we fail, and signal we are done + if not ret: + with pp_lock: + pp_result[0] = False + + pp_event.set() + return + + # Enqueue all the work we want to process + for item in items: + task_queue.put(item) + + # Process all the work + threads = [] + for cpu in range(cpus): + thread = threading.Thread(target=worker) + + thread.daemon = True + thread.start() + threads.append(thread) + + # Wait for the threads to finish + # Loop with a timeout so that we can process Ctrl-C interrupts + # Note: On Python 2.6 wait always returns None so we check is_set also, + # This works because we only set the event once, and never reset it + while not pp_event.wait(1) and not pp_event.is_set(): + time.sleep(1) + + for thread in threads: + thread.join() + + return pp_result[0] + +def get_base_dir(): + """Get the base directory for mongo repo. + This script assumes that it is running in buildscripts/, and uses + that to find the base directory. + """ + script_path = os.path.dirname(os.path.realpath(__file__)) + + return os.path.dirname(script_path) + +def get_repos(): + """Get a list of Repos to check clang-format for + """ + base_dir = get_base_dir() + + # Get a list of modules + # TODO: how do we filter rocks, does it matter? + mongo_modules = moduleconfig.discover_module_directories( + os.path.join(base_dir, MODULE_DIR), None) + + paths = [os.path.join(base_dir, MODULE_DIR, m) for m in mongo_modules] + + paths.append(base_dir) + + return [Repo(p) for p in paths] + + +class Repo(object): + """Class encapsulates all knowledge about a git repository, and its metadata + to run clang-format. + """ + def __init__(self, path): + self.path = path + + # Get candidate files + self.candidate_files = self._get_candidate_files() + + self.root = self._get_root() + + def _callgito(self, args): + """Call git for this repository + """ + # These two flags are the equivalent of -C in newer versions of Git + # but we use these to support versions back to ~1.8 + return callo(['git', '--git-dir', os.path.join(self.path, ".git"), + '--work-tree', self.path] + args) + + def _get_local_dir(self, path): + """Get a directory path relative to the git root directory + """ + if os.path.isabs(path): + return os.path.relpath(path, self.root) + return path + + def get_candidates(self, candidates): + """Get the set of candidate files to check by doing an intersection + between the input list, and the list of candidates in the repository + + Returns the full path to the file for clang-format to consume. + """ + # NOTE: Files may have an absolute root (i.e. leading /) + + if candidates is not None and len(candidates) > 0: + candidates = [self._get_local_dir(f) for f in candidates] + valid_files = list(set(candidates).intersection(self.get_candidate_files())) + else: + valid_files = list(self.get_candidate_files()) + + # Get the full file name here + valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files] + return valid_files + + def get_root(self): + """Get the root directory for this repository + """ + return self.root + + def _get_root(self): + """Gets the root directory for this repository from git + """ + gito = self._callgito(['rev-parse', '--show-toplevel']) + + return gito.rstrip() + + def get_candidate_files(self): + """Get a list of candidate files + """ + return self._get_candidate_files() + + def _get_candidate_files(self): + """Query git to get a list of all files in the repo to consider for analysis + """ + gito = self._callgito(["ls-files"]) + + # This allows us to pick all the interesting files + # in the mongo and mongo-enterprise repos + file_list = [line.rstrip() + for line in gito.splitlines() if "src" in line and not "src/third_party" in line] + + files_match = re.compile('\\.(h|cpp)$') + + file_list = [a for a in file_list if files_match.search(a)] + + return file_list + + +def expand_file_string(glob_pattern): + """Expand a string that represents a set of files + """ + return [os.path.abspath(f) for f in globstar.iglob(glob_pattern)] + +def get_files_to_check(files): + """Filter the specified list of files to check down to the actual + list of files that need to be checked.""" + candidates = [] + + # Get a list of candidate_files + candidates = [expand_file_string(f) for f in files] + candidates = list(itertools.chain.from_iterable(candidates)) + + repos = get_repos() + + valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates) for r in repos])) + + return valid_files + +def get_files_to_check_from_patch(patches): + """Take a patch file generated by git diff, and scan the patch for a list of files to check. + """ + candidates = [] + + # Get a list of candidate_files + check = re.compile(r"^diff --git a\/([a-z\/\.\-_0-9]+) b\/[a-z\/\.\-_0-9]+") + + lines = [] + for patch in patches: + with open(patch, "rb") as infile: + lines += infile.readlines() + + candidates = [check.match(line).group(1) for line in lines if check.match(line)] + + repos = get_repos() + + valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates) for r in repos])) + + return valid_files + +def _get_build_dir(): + """Get the location of the scons' build directory in case we need to download clang-format + """ + return os.path.join(get_base_dir(), "build") + +def _lint_files(clang_format, files): + """Lint a list of files with clang-format + """ + clang_format = ClangFormat(clang_format, _get_build_dir()) + + lint_clean = parallel_process([os.path.abspath(f) for f in files], clang_format.lint) + + if not lint_clean: + print("ERROR: Code Style does not match coding style") + sys.exit(1) + +def lint_patch(clang_format, infile): + """Lint patch command entry point + """ + files = get_files_to_check_from_patch(infile) + + # Patch may have files that we do not want to check which is fine + if files: + _lint_files(clang_format, files) + +def lint(clang_format, glob): + """Lint files command entry point + """ + files = get_files_to_check(glob) + + _lint_files(clang_format, files) + + return True + +def _format_files(clang_format, files): + """Format a list of files with clang-format + """ + clang_format = ClangFormat(clang_format, _get_build_dir()) + + format_clean = parallel_process([os.path.abspath(f) for f in files], clang_format.format) + + if not format_clean: + print("ERROR: failed to format files") + sys.exit(1) + +def format_func(clang_format, glob): + """Format files command entry point + """ + files = get_files_to_check(glob) + + _format_files(clang_format, files) + +def usage(): + """Print usage + """ + print("clang-format.py supports 3 commands [ lint, lint-patch, format ]. Run " + " <command> -? for more information") + +def main(): + """Main entry point + """ + if len(sys.argv) > 1: + command = sys.argv[1] + + parser = OptionParser() + parser.add_option("-c", "--clang-format", type="string", dest="clang_format") + + if command == "lint": + (options, args) = parser.parse_args(args=sys.argv[2:]) + lint(options.clang_format, args) + elif command == "lint-patch": + (options, args) = parser.parse_args(args=sys.argv[2:]) + lint_patch(options.clang_format, args) + elif command == "format": + (options, args) = parser.parse_args(args=sys.argv[2:]) + format_func(options.clang_format, args) + else: + usage() + else: + usage() + +if __name__ == "__main__": + main() diff --git a/buildscripts/moduleconfig.py b/buildscripts/moduleconfig.py index 111b49545e5..7f98667b6b5 100644 --- a/buildscripts/moduleconfig.py +++ b/buildscripts/moduleconfig.py @@ -24,7 +24,8 @@ MongoDB module SConscript files can describe libraries, programs and unit tests, MongoDB SConscript files do. """ -__all__ = ('discover_modules', 'configure_modules', 'register_module_test') +__all__ = ('discover_modules', 'discover_module_directories', 'configure_modules', + 'register_module_test') import imp import inspect @@ -69,6 +70,36 @@ def discover_modules(module_root, allowed_modules): return found_modules +def discover_module_directories(module_root, allowed_modules): + """Scans module_root for subdirectories that look like MongoDB modules. + + Returns a list of directory names. + """ + if not os.path.isdir(module_root): + return [] + + found_modules = [] + + if allowed_modules is not None: + allowed_modules = allowed_modules.split(',') + + for name in os.listdir(module_root): + root = os.path.join(module_root, name) + if name.startswith('.') or not os.path.isdir(root): + continue + + build_py = os.path.join(root, 'build.py') + + if allowed_modules is not None and name not in allowed_modules: + print "skipping module: %s" % name + continue + + if os.path.isfile(build_py): + print "adding module: %s" % name + found_modules.append(name) + + return found_modules + def configure_modules(modules, conf): """ Run the configure() function in the build.py python modules for each module in "modules" (as created by discover_modules). diff --git a/buildscripts/resmokelib/utils/globstar.py b/buildscripts/resmokelib/utils/globstar.py index 7a744014c7b..644ebfe3e38 100644 --- a/buildscripts/resmokelib/utils/globstar.py +++ b/buildscripts/resmokelib/utils/globstar.py @@ -89,6 +89,9 @@ def _split_path(pathname): while True: (dirname, basename) = os.path.split(pathname) parts.append(basename) + if pathname == dirname: + parts.append(dirname) + break if not dirname: break pathname = dirname |