summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-06-11 11:22:53 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2015-06-20 10:55:52 -0400
commit01965cf52bce6976637ecb8f4a622aeb05ab256a (patch)
treea4641b27e879a47adbeaac435efd9ecda80971e9
parent1178eff1ddf60a7dac3a8cd71e2fdd278aa01b52 (diff)
downloadmongo-01965cf52bce6976637ecb8f4a622aeb05ab256a.tar.gz
SERVER-18575: clang_format.py
-rwxr-xr-xbuildscripts/clang_format.py592
-rw-r--r--buildscripts/moduleconfig.py33
-rw-r--r--buildscripts/resmokelib/utils/globstar.py3
3 files changed, 627 insertions, 1 deletions
diff --git a/buildscripts/clang_format.py b/buildscripts/clang_format.py
new file mode 100755
index 00000000000..f0473c61c01
--- /dev/null
+++ b/buildscripts/clang_format.py
@@ -0,0 +1,592 @@
+#! /usr/bin/env python
+"""
+A script that provides:
+1. Ability to grab binaries where possible from LLVM.
+2. Ability to download binaries from MongoDB cache for clang-format.
+3. Validates clang-format is the right version.
+4. Has support for checking which files are to be checked.
+5. Supports validating and updating a set of files to the right coding style.
+"""
+from __future__ import print_function, absolute_import
+
+import Queue
+import difflib
+import itertools
+import os
+import re
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import urllib
+from distutils import spawn
+from optparse import OptionParser
+from multiprocessing import cpu_count
+
+# Get relative imports to work when the package is not installed on the PYTHONPATH.
+if __name__ == "__main__" and __package__ is None:
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from buildscripts.resmokelib.utils import globstar
+from buildscripts import moduleconfig
+
+
+##############################################################################
+#
+# Constants for clang-format
+#
+#
+
+# Expected version of clang-format
+CLANG_FORMAT_VERSION = "3.6.0"
+
+# Name of clang-format as a binary
+CLANG_FORMAT_PROGNAME = "clang-format"
+
+# URL location of the "cached" copy of clang-format to download
+# for users which do not have clang-format installed
+CLANG_FORMAT_HTTP_LINUX_CACHE = "https://s3.amazonaws.com/boxes.10gen.com/build/clang-format-rhel55.tar.gz"
+
+# URL on LLVM's website to download the clang tarball
+CLANG_FORMAT_SOURCE_URL_BASE = string.Template("http://llvm.org/releases/$version/clang+llvm-$version-$llvm_distro.tar.xz")
+
+# Path in the tarball to the clang-format binary
+CLANG_FORMAT_SOURCE_TAR_BASE = string.Template("clang+llvm-$version-$tar_path/bin/" + CLANG_FORMAT_PROGNAME)
+
+# Path to the modules in the mongodb source tree
+# Has to match the string in SConstruct
+MODULE_DIR = "src/mongo/db/modules"
+
+# Copied from python 2.7 version of subprocess.py
+# Exception classes used by this module.
+class CalledProcessError(Exception):
+ """This exception is raised when a process run by check_call() or
+ check_output() returns a non-zero exit status.
+ The exit status will be stored in the returncode attribute;
+ check_output() will also store the output in the output attribute.
+ """
+ def __init__(self, returncode, cmd, output=None):
+ self.returncode = returncode
+ self.cmd = cmd
+ self.output = output
+ def __str__(self):
+ return ("Command '%s' returned non-zero exit status %d with output %s" %
+ (self.cmd, self.returncode, self.output))
+
+
+# Copied from python 2.7 version of subprocess.py
+def check_output(*popenargs, **kwargs):
+ r"""Run command with arguments and return its output as a byte string.
+
+ If the exit code was non-zero it raises a CalledProcessError. The
+ CalledProcessError object will have the return code in the returncode
+ attribute and output in the output attribute.
+
+ The arguments are the same as for the Popen constructor. Example:
+
+ >>> check_output(["ls", "-l", "/dev/null"])
+ 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n'
+
+ The stdout argument is not allowed as it is used internally.
+ To capture standard error in the result, use stderr=STDOUT.
+
+ >>> check_output(["/bin/sh", "-c",
+ ... "ls -l non_existent_file ; exit 0"],
+ ... stderr=STDOUT)
+ 'ls: non_existent_file: No such file or directory\n'
+ """
+ if 'stdout' in kwargs:
+ raise ValueError('stdout argument not allowed, it will be overridden.')
+ process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+ output, unused_err = process.communicate()
+ retcode = process.poll()
+ if retcode:
+ cmd = kwargs.get("args")
+ if cmd is None:
+ cmd = popenargs[0]
+ raise CalledProcessError(retcode, cmd, output)
+ return output
+
+def callo(args):
+ """Call a program, and capture its output
+ """
+ return check_output(args)
+
+def get_llvm_url(version, llvm_distro):
+ """Get the url to download clang-format from llvm.org
+ """
+ return CLANG_FORMAT_SOURCE_URL_BASE.substitute(
+ version=version,
+ llvm_distro=llvm_distro)
+
+def get_tar_path(version, tar_path):
+ """ Get the path to clang-format in the llvm tarball
+ """
+ return CLANG_FORMAT_SOURCE_TAR_BASE.substitute(
+ version=version,
+ tar_path=tar_path)
+
+def get_clang_format_from_llvm(llvm_distro, tar_path, dest_file):
+ """Download clang-format from llvm.org, unpack the tarball,
+ and put clang-format in the specified place
+ """
+ # Build URL
+ url = get_llvm_url(CLANG_FORMAT_VERSION, llvm_distro)
+
+ dest_dir = tempfile.gettempdir()
+ temp_tar_file = os.path.join(dest_dir, "temp.tar.xz")
+
+ # Download from LLVM
+ print("Downloading clang-format %s from %s, saving to %s" % (CLANG_FORMAT_VERSION,
+ url, temp_tar_file))
+ urllib.urlretrieve(url, temp_tar_file)
+
+ # Extract just clang format file
+ subprocess.call(['tar', 'zxvf', temp_tar_file, '*clang-format*'])
+
+ # Destination Path
+ shutil.move(get_tar_path(CLANG_FORMAT_VERSION, tar_path), dest_file)
+
+def get_clang_format_from_linux_cache(dest_file):
+ """Get clang-format from mongodb's cache
+ """
+ # Get URL
+ url = CLANG_FORMAT_HTTP_LINUX_CACHE
+
+ dest_dir = tempfile.gettempdir()
+ temp_tar_file = os.path.join(dest_dir, "temp.tar.xz")
+
+ # Download the file
+ print("Downloading clang-format %s from %s, saving to %s" % (CLANG_FORMAT_VERSION,
+ url, temp_tar_file))
+ urllib.urlretrieve(url, temp_tar_file)
+
+ # Extract just clang format file
+ subprocess.call(['tar', 'zxvf', temp_tar_file, '*clang-format*'])
+
+ # Destination Path
+ shutil.move("llvm/Release/bin/clang-format", dest_file)
+
+
+class ClangFormat(object):
+ """Class encapsulates finding a suitable copy of clang-format,
+ and linting/formating an individual file
+ """
+ def __init__(self, path, cache_dir):
+ clang_format_progname = CLANG_FORMAT_PROGNAME
+
+ # Initialize clang-format configuration information
+ if sys.platform.startswith("linux"):
+ #"3.6.0/clang+llvm-3.6.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
+ self.platform = "linux_x64"
+ self.llvm_distro = "x86_64-linux-gnu-ubuntu"
+ self.tar_path = "x86_64-linux-gnu"
+ elif sys.platform == "win32":
+ self.platform = "windows_x64"
+ self.llvm_distro = "windows_x64"
+ self.tar_path = None
+ clang_format_progname += ".exe"
+ elif sys.platform == "darwin":
+ #"3.6.0/clang+llvm-3.6.0-x86_64-apple-darwin.tar.xz
+ self.platform = "darwin_x64"
+ self.llvm_distro = "x86_64-apple-darwin"
+ self.tar_path = "x86_64-apple-darwin"
+
+ self.path = None
+
+ # Find Clang-Format now
+ if path is not None:
+ if os.path.isfile(path):
+ self.path = path
+ else:
+ print("WARNING: Could not find clang-format %s" % (path))
+
+ # Check the envionrment variable
+ if "MONGO_CLANG_FORMAT" in os.environ:
+ self.path = os.environ["MONGO_CLANG_FORMAT"]
+
+ if self.path and not self._validate_version(warn=True):
+ self.path = None
+
+ # Check the users' PATH environment variable now
+ if self.path is None:
+ self.path = spawn.find_executable(clang_format_progname)
+
+ if self.path and not self._validate_version(warn=True):
+ self.path = None
+
+ # If Windows, try to grab it from Program Files
+ if sys.platform == "win32":
+ win32bin = os.path.join(os.environ["ProgramFiles(x86)"], "LLVM\\bin\\clang-format.exe")
+ if os.path.exists(win32bin):
+ self.path = win32bin
+
+ # Have not found it yet, download it from the web
+ if self.path is None:
+ if not os.path.isdir(cache_dir):
+ os.makedirs(cache_dir)
+
+ self.path = os.path.join(cache_dir, clang_format_progname)
+
+ if not os.path.isfile(self.path):
+ if sys.platform.startswith("linux"):
+ get_clang_format_from_linux_cache(self.path)
+ elif sys.platform == "darwin":
+ get_clang_format_from_llvm(self.llvm_distro, self.tar_path, self.path)
+ else:
+ print("ERROR: clang-format.py does not support downloading clang-format " +
+ " on this platform, please install clang-format " + CLANG_FORMAT_VERSION)
+
+ # Validate we have the correct version
+ self._validate_version()
+
+ self.print_lock = threading.Lock()
+
+ def _validate_version(self, warn=False):
+ """Validate clang-format is the expected version
+ """
+ cf_version = callo([self.path, "--version"])
+
+ if CLANG_FORMAT_VERSION in cf_version:
+ return True
+
+ if warn:
+ print("WARNING: clang-format found in path, but incorrect version found at " +
+ self.path + " with version: " + cf_version)
+
+ return False
+
+ def lint(self, file_name):
+ """Check the specified file has the correct format
+ """
+ with open(file_name, 'r') as original_text:
+ original_file = original_text.read()
+
+ # Get formatted file as clang-format would format the file
+ formatted_file = callo([self.path, "--style=file", file_name])
+
+ if original_file != formatted_file:
+
+ original_lines = original_file.splitlines()
+ formatted_lines = formatted_file.splitlines()
+ result = difflib.unified_diff(original_lines, formatted_lines)
+
+ # Take a lock to ensure diffs do not get mixed
+ with self.print_lock:
+ print("ERROR: Found diff for " + file_name)
+ print("To fix formatting errors, run %s --style=file -i %s" %
+ (self.path, file_name))
+ for line in result:
+ print(line.rstrip())
+
+ return False
+
+ return True
+
+ def format(self, file_name):
+ """Update the format of the specified file
+ """
+
+ # Update the file with clang-format
+ return not subprocess.call([self.path, "--style=file", "-i", file_name])
+
+
+def parallel_process(items, func):
+ """Run a set of work items to completion
+ """
+ try:
+ cpus = cpu_count()
+ except NotImplementedError:
+ cpus = 1
+
+ print("Running across %d cpus" % (cpus))
+
+ task_queue = Queue.Queue()
+
+ # Use a list so that worker function will capture this variable
+ pp_event = threading.Event()
+ pp_result = [True]
+ pp_lock = threading.Lock()
+
+ def worker():
+ """Worker thread to process work items in parallel
+ """
+ while not pp_event.is_set():
+ try:
+ item = task_queue.get_nowait()
+ except Queue.Empty:
+ # if the queue is empty, exit the worker thread
+ pp_event.set()
+ return
+
+ try:
+ ret = func(item)
+ finally:
+ # Tell the queue we finished with the item
+ task_queue.task_done()
+
+ # Return early if we fail, and signal we are done
+ if not ret:
+ with pp_lock:
+ pp_result[0] = False
+
+ pp_event.set()
+ return
+
+ # Enqueue all the work we want to process
+ for item in items:
+ task_queue.put(item)
+
+ # Process all the work
+ threads = []
+ for cpu in range(cpus):
+ thread = threading.Thread(target=worker)
+
+ thread.daemon = True
+ thread.start()
+ threads.append(thread)
+
+ # Wait for the threads to finish
+ # Loop with a timeout so that we can process Ctrl-C interrupts
+ # Note: On Python 2.6 wait always returns None so we check is_set also,
+ # This works because we only set the event once, and never reset it
+ while not pp_event.wait(1) and not pp_event.is_set():
+ time.sleep(1)
+
+ for thread in threads:
+ thread.join()
+
+ return pp_result[0]
+
+def get_base_dir():
+ """Get the base directory for mongo repo.
+ This script assumes that it is running in buildscripts/, and uses
+ that to find the base directory.
+ """
+ script_path = os.path.dirname(os.path.realpath(__file__))
+
+ return os.path.dirname(script_path)
+
+def get_repos():
+ """Get a list of Repos to check clang-format for
+ """
+ base_dir = get_base_dir()
+
+ # Get a list of modules
+ # TODO: how do we filter rocks, does it matter?
+ mongo_modules = moduleconfig.discover_module_directories(
+ os.path.join(base_dir, MODULE_DIR), None)
+
+ paths = [os.path.join(base_dir, MODULE_DIR, m) for m in mongo_modules]
+
+ paths.append(base_dir)
+
+ return [Repo(p) for p in paths]
+
+
+class Repo(object):
+ """Class encapsulates all knowledge about a git repository, and its metadata
+ to run clang-format.
+ """
+ def __init__(self, path):
+ self.path = path
+
+ # Get candidate files
+ self.candidate_files = self._get_candidate_files()
+
+ self.root = self._get_root()
+
+ def _callgito(self, args):
+ """Call git for this repository
+ """
+ # These two flags are the equivalent of -C in newer versions of Git
+ # but we use these to support versions back to ~1.8
+ return callo(['git', '--git-dir', os.path.join(self.path, ".git"),
+ '--work-tree', self.path] + args)
+
+ def _get_local_dir(self, path):
+ """Get a directory path relative to the git root directory
+ """
+ if os.path.isabs(path):
+ return os.path.relpath(path, self.root)
+ return path
+
+ def get_candidates(self, candidates):
+ """Get the set of candidate files to check by doing an intersection
+ between the input list, and the list of candidates in the repository
+
+ Returns the full path to the file for clang-format to consume.
+ """
+ # NOTE: Files may have an absolute root (i.e. leading /)
+
+ if candidates is not None and len(candidates) > 0:
+ candidates = [self._get_local_dir(f) for f in candidates]
+ valid_files = list(set(candidates).intersection(self.get_candidate_files()))
+ else:
+ valid_files = list(self.get_candidate_files())
+
+ # Get the full file name here
+ valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files]
+ return valid_files
+
+ def get_root(self):
+ """Get the root directory for this repository
+ """
+ return self.root
+
+ def _get_root(self):
+ """Gets the root directory for this repository from git
+ """
+ gito = self._callgito(['rev-parse', '--show-toplevel'])
+
+ return gito.rstrip()
+
+ def get_candidate_files(self):
+ """Get a list of candidate files
+ """
+ return self._get_candidate_files()
+
+ def _get_candidate_files(self):
+ """Query git to get a list of all files in the repo to consider for analysis
+ """
+ gito = self._callgito(["ls-files"])
+
+ # This allows us to pick all the interesting files
+ # in the mongo and mongo-enterprise repos
+ file_list = [line.rstrip()
+ for line in gito.splitlines() if "src" in line and not "src/third_party" in line]
+
+ files_match = re.compile('\\.(h|cpp)$')
+
+ file_list = [a for a in file_list if files_match.search(a)]
+
+ return file_list
+
+
+def expand_file_string(glob_pattern):
+ """Expand a string that represents a set of files
+ """
+ return [os.path.abspath(f) for f in globstar.iglob(glob_pattern)]
+
+def get_files_to_check(files):
+ """Filter the specified list of files to check down to the actual
+ list of files that need to be checked."""
+ candidates = []
+
+ # Get a list of candidate_files
+ candidates = [expand_file_string(f) for f in files]
+ candidates = list(itertools.chain.from_iterable(candidates))
+
+ repos = get_repos()
+
+ valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates) for r in repos]))
+
+ return valid_files
+
+def get_files_to_check_from_patch(patches):
+ """Take a patch file generated by git diff, and scan the patch for a list of files to check.
+ """
+ candidates = []
+
+ # Get a list of candidate_files
+ check = re.compile(r"^diff --git a\/([a-z\/\.\-_0-9]+) b\/[a-z\/\.\-_0-9]+")
+
+ lines = []
+ for patch in patches:
+ with open(patch, "rb") as infile:
+ lines += infile.readlines()
+
+ candidates = [check.match(line).group(1) for line in lines if check.match(line)]
+
+ repos = get_repos()
+
+ valid_files = list(itertools.chain.from_iterable([r.get_candidates(candidates) for r in repos]))
+
+ return valid_files
+
+def _get_build_dir():
+ """Get the location of the scons' build directory in case we need to download clang-format
+ """
+ return os.path.join(get_base_dir(), "build")
+
+def _lint_files(clang_format, files):
+ """Lint a list of files with clang-format
+ """
+ clang_format = ClangFormat(clang_format, _get_build_dir())
+
+ lint_clean = parallel_process([os.path.abspath(f) for f in files], clang_format.lint)
+
+ if not lint_clean:
+ print("ERROR: Code Style does not match coding style")
+ sys.exit(1)
+
+def lint_patch(clang_format, infile):
+ """Lint patch command entry point
+ """
+ files = get_files_to_check_from_patch(infile)
+
+ # Patch may have files that we do not want to check which is fine
+ if files:
+ _lint_files(clang_format, files)
+
+def lint(clang_format, glob):
+ """Lint files command entry point
+ """
+ files = get_files_to_check(glob)
+
+ _lint_files(clang_format, files)
+
+ return True
+
+def _format_files(clang_format, files):
+ """Format a list of files with clang-format
+ """
+ clang_format = ClangFormat(clang_format, _get_build_dir())
+
+ format_clean = parallel_process([os.path.abspath(f) for f in files], clang_format.format)
+
+ if not format_clean:
+ print("ERROR: failed to format files")
+ sys.exit(1)
+
+def format_func(clang_format, glob):
+ """Format files command entry point
+ """
+ files = get_files_to_check(glob)
+
+ _format_files(clang_format, files)
+
+def usage():
+ """Print usage
+ """
+ print("clang-format.py supports 3 commands [ lint, lint-patch, format ]. Run "
+ " <command> -? for more information")
+
+def main():
+ """Main entry point
+ """
+ if len(sys.argv) > 1:
+ command = sys.argv[1]
+
+ parser = OptionParser()
+ parser.add_option("-c", "--clang-format", type="string", dest="clang_format")
+
+ if command == "lint":
+ (options, args) = parser.parse_args(args=sys.argv[2:])
+ lint(options.clang_format, args)
+ elif command == "lint-patch":
+ (options, args) = parser.parse_args(args=sys.argv[2:])
+ lint_patch(options.clang_format, args)
+ elif command == "format":
+ (options, args) = parser.parse_args(args=sys.argv[2:])
+ format_func(options.clang_format, args)
+ else:
+ usage()
+ else:
+ usage()
+
+if __name__ == "__main__":
+ main()
diff --git a/buildscripts/moduleconfig.py b/buildscripts/moduleconfig.py
index 111b49545e5..7f98667b6b5 100644
--- a/buildscripts/moduleconfig.py
+++ b/buildscripts/moduleconfig.py
@@ -24,7 +24,8 @@ MongoDB module SConscript files can describe libraries, programs and unit tests,
MongoDB SConscript files do.
"""
-__all__ = ('discover_modules', 'configure_modules', 'register_module_test')
+__all__ = ('discover_modules', 'discover_module_directories', 'configure_modules',
+ 'register_module_test')
import imp
import inspect
@@ -69,6 +70,36 @@ def discover_modules(module_root, allowed_modules):
return found_modules
+def discover_module_directories(module_root, allowed_modules):
+ """Scans module_root for subdirectories that look like MongoDB modules.
+
+ Returns a list of directory names.
+ """
+ if not os.path.isdir(module_root):
+ return []
+
+ found_modules = []
+
+ if allowed_modules is not None:
+ allowed_modules = allowed_modules.split(',')
+
+ for name in os.listdir(module_root):
+ root = os.path.join(module_root, name)
+ if name.startswith('.') or not os.path.isdir(root):
+ continue
+
+ build_py = os.path.join(root, 'build.py')
+
+ if allowed_modules is not None and name not in allowed_modules:
+ print "skipping module: %s" % name
+ continue
+
+ if os.path.isfile(build_py):
+ print "adding module: %s" % name
+ found_modules.append(name)
+
+ return found_modules
+
def configure_modules(modules, conf):
""" Run the configure() function in the build.py python modules for each module in "modules"
(as created by discover_modules).
diff --git a/buildscripts/resmokelib/utils/globstar.py b/buildscripts/resmokelib/utils/globstar.py
index 7a744014c7b..644ebfe3e38 100644
--- a/buildscripts/resmokelib/utils/globstar.py
+++ b/buildscripts/resmokelib/utils/globstar.py
@@ -89,6 +89,9 @@ def _split_path(pathname):
while True:
(dirname, basename) = os.path.split(pathname)
parts.append(basename)
+ if pathname == dirname:
+ parts.append(dirname)
+ break
if not dirname:
break
pathname = dirname