diff options
author | Daniel Moody <dmoody256@gmail.com> | 2023-05-11 19:48:47 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-05-11 22:23:42 +0000 |
commit | 14e3b091373c63edb7ece3b47f35f3dec198fdad (patch) | |
tree | d997311418d8c25e18a5fc266c503127723181d1 /buildscripts/iwyu | |
parent | e850381e039d22d73d6a5fa2e3ca41f76f7dfa1c (diff) | |
download | mongo-14e3b091373c63edb7ece3b47f35f3dec198fdad.tar.gz |
SERVER-71123 implement IWYU tool and add required human changes
Diffstat (limited to 'buildscripts/iwyu')
-rw-r--r-- | buildscripts/iwyu/README.md | 64 | ||||
-rw-r--r-- | buildscripts/iwyu/iwyu_config.yml | 72 | ||||
-rw-r--r-- | buildscripts/iwyu/run_iwyu_analysis.py | 996 | ||||
-rw-r--r-- | buildscripts/iwyu/test/basic/a.h | 1 | ||||
-rw-r--r-- | buildscripts/iwyu/test/basic/b.cpp | 5 | ||||
-rw-r--r-- | buildscripts/iwyu/test/basic/b.h | 1 | ||||
-rw-r--r-- | buildscripts/iwyu/test/basic/expected_results.py | 17 | ||||
-rw-r--r-- | buildscripts/iwyu/test/basic/test_config.yml | 25 | ||||
-rw-r--r-- | buildscripts/iwyu/test/no_include/a.h | 1 | ||||
-rw-r--r-- | buildscripts/iwyu/test/no_include/b.cpp | 5 | ||||
-rw-r--r-- | buildscripts/iwyu/test/no_include/b.h | 1 | ||||
-rw-r--r-- | buildscripts/iwyu/test/no_include/expected_results.py | 18 | ||||
-rw-r--r-- | buildscripts/iwyu/test/no_include/test_config.yml | 27 | ||||
-rw-r--r-- | buildscripts/iwyu/test/run_tests.py | 97 |
14 files changed, 1330 insertions, 0 deletions
diff --git a/buildscripts/iwyu/README.md b/buildscripts/iwyu/README.md new file mode 100644 index 00000000000..2e925d7500a --- /dev/null +++ b/buildscripts/iwyu/README.md @@ -0,0 +1,64 @@ +# IWYU Analysis tool + +This tool will run +[include-what-you-use](https://github.com/include-what-you-use/include-what-you-use) +(IWYU) analysis across the codebase via `compile_commands.json`. + +The `iwyu_config.yml` file consists of the current options and automatic +pragma marking. You can exclude files from the analysis here. + +The tool has two main modes of operation, `fix` and `check` modes. `fix` +mode will attempt to make changes to the source files based off IWYU's +suggestions. The check mode will simply check if there are any suggestion +at all. + +`fix` mode will take a long time to run, as the tool needs to rerun any +source in which a underlying header was changed to ensure things are not +broken, and so therefore ends up recompile the codebase several times over. + +For more information please refer the the script `--help` option. + +# Example usage: + +First you must generate the `compile_commands.json` file via this command: + +``` +python3 buildscripts/scons.py --build-profile=compiledb compiledb +``` + +Next you can run the analysis: + +``` +python3 buildscripts/iwyu/run_iwyu_analysis.py +``` +The default mode is fix mode, and it will start making changes to the code +if any changes are found. + +# Debugging failures + +Occasionally IWYU tool will run into problems where it is unable to suggest +valid changes and the changes will cause things to break (not compile). When +it his a failure it will copy the source and all the header's that were used +at the time of the compilation into a directory where the same command can be +run to reproduce the error. + +You can examine the suggested changes in the source and headers and compare +them to the working source tree. Then you can make corrective changes to allow + IWYU to get past the failure. + +IWYU is not perfect and it make several mistakes that a human can understand +and fix appropriately. + +# Running the tests + +This tool includes its own end to end testing. The test directory includes +sub directories which contain source and iwyu configs to run the tool against. +The tests will then compare the results to built in expected results and fail +if the the tests are not producing the expected results. + +To run the tests use the command: + +``` +cd buildscripts/iwyu/test +python3 run_tests.py +``` diff --git a/buildscripts/iwyu/iwyu_config.yml b/buildscripts/iwyu/iwyu_config.yml new file mode 100644 index 00000000000..2229242b427 --- /dev/null +++ b/buildscripts/iwyu/iwyu_config.yml @@ -0,0 +1,72 @@ +# options passed to IWYU +iwyu_options: + - '--mapping_file=etc/iwyu_mapping.imp' + - '--no_fwd_decls' + - '--prefix_header_includes=add' + - '--transitive_includes_only' + +# options passed to the fix script +fix_options: + - '--blank_lines' + - '--nocomments' + - '--noreorder' + - '--separate_project_includes=mongo' + - '--safe_headers' + - '--only_re=^src/mongo\/.*' + # TODO SERVER-77051 we will eventually turn this on when our codebase is cleaned up with out. + # - '--nosafe_headers' + +# filename regex to swap no_include in place +# quotes and brackets not included quotes are always assumed +# since this is targeting IWYU added headers +no_includes: + # avoid boost crazyness + - 'boost/.+/detail/.+' + - 'asio/impl/.+' + - 'boost/.+\.ipp' + # arch specific + - 'boost/predef/hardware/simd/x86.+' + - 'emmintrin\.h' + # we use a third party format which confuses IWYU + - 'format\.h' + # this is a link time symbol overloading thing not meant to be included + - 'libunwind-x86_64\.h' + # abuse of preprocessor + - 'mongo/db/namespace_string_reserved\.def\.h' + +# prefixes (non regex) to skip +skip_files: + - 'src/third_party' + - 'build/' + - 'src/mongo/tools/mongo_tidy_checks' + - 'src/mongo/util/net' # causes linkage issues + # IWYU confused on forward declares + - 'src/mongo/db/exec/near.cpp' + - 'src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp' + - 'src/mongo/transport/asio/asio_transport_layer.cpp' + # causes IWYU to crash: + - 'src/mongo/db/update/update_internal_node.cpp' + - 'src/mongo/db/update/update_array_node.cpp' + - 'src/mongo/db/update/update_object_node.cpp' + - 'src/mongo/db/update/update_array_node_test.cpp' + - 'src/mongo/db/update/update_object_node_test.cpp' + - 'src/mongo/util/options_parser/environment.cpp' + - 'src/mongo/util/options_parser/option_section.cpp' + +# regex file paths to add keep pragma +# include quotes are angle brackets +keep_includes: + - '<fmt/printf\.h>' + - '<fmt/ranges\.h>' + - '<fmt/chrono\.h>' + - '<asio\.hpp>' + - '<boost/utility/in_place_factory\.hpp>' + - '<libunwind.h>' + - '<fstream>' # IWYU messes up template instantiation + - '"mongo/rpc/object_check\.h"' + - '"mongo/base/init\.h"' + - '"mongo/scripting/mozjs/wrapconstrainedmethod\.h"' + - '"mongo/dbtests/dbtests\.h"' # this is due to using statements in the header + - '"mongo/config\.h"' + - '"mongo/util/overloaded_visitor\.h"' + - '"mongo/db/query/optimizer/node\.h"' diff --git a/buildscripts/iwyu/run_iwyu_analysis.py b/buildscripts/iwyu/run_iwyu_analysis.py new file mode 100644 index 00000000000..85606056cc1 --- /dev/null +++ b/buildscripts/iwyu/run_iwyu_analysis.py @@ -0,0 +1,996 @@ +#!/usr/bin/env python3 +""" +TOOL FUNCTIONAL DESCRIPTION. + +Currently the tool works by running IWYU on a subset of compile_commands.json +(the ones we care about like checked in mongo source) and testing each change +in a copy of the original source/header tree so that other compiles are not +affected until it passes a normal compile itself. Due to header dependencies +we must recompile the source files to catch issue IWYU may have introduced +with some dependent header change. Header dependencies do not form a DAG so +we can not process sources in a deterministic fashion. The tool will loop +through all the compilations until all dependents in a compilation are +determined unchanged from the last time the compilation was performed. + +The general workflow used here is to run the tool till there no changes +(several hours on rhel-xxlarge) and fix the errors either in the tool config +or as a manual human change in the code. + +TOOL TECHNICAL DESCRIPTION: + +Regarding the code layout, the main function setups a thread pool executor +and processes each source from the compile_commands. From there it runs a +thread function and within that 5 parts (each there own function) for +each source file: + +1. Skip if deps are unchanged +2. Get the headers deps via -MMD +3. Run IWYU +4. Apply Fixes +5. test compile, record new header deps if passed + +The tool uses mtime and MD5 hashing to know if any header dep has changed. + +""" + +import argparse +import json +import subprocess +import tempfile +import shlex +import os +import re +import concurrent.futures +import hashlib +import atexit +import traceback +import threading +import shutil +import signal +import sys +import yaml +import enum +from dataclasses import dataclass, asdict +from typing import Dict, List, Any, Optional, Callable, Union, Tuple + +from tqdm import tqdm +from colorama import init as colorama_init +from colorama import Fore + +colorama_init() + +parser = argparse.ArgumentParser(description='Run include what you use and test output') + +parser.add_argument('--compile-commands', metavar='FILE', type=str, default='compile_commands.json', + help='Path to the compile commands file to use.') +parser.add_argument( + '--check', action='store_true', help= + 'Enables check mode, which does not apply fixes and only runs to see if any files produce IWYU changes. Exit 0 if no new changes detected.' +) +parser.add_argument( + '--config-file', metavar='FILE', type=str, default="", help= + 'Enables check mode, which does not apply fixes and only runs to see if any files produce IWYU changes. Exit 0 if no new changes detected.' +) +parser.add_argument( + '--iwyu-data', metavar='FILE', type=str, default='iwyu.dat', + help='Location of data used by IWYU, contains hash and status info about all files.') +parser.add_argument( + '--keep-going', action='store_true', help= + 'Do not stop on errors, instead resubmit the job to try again later (after things may have been fixed elsewhere)' +) +parser.add_argument( + '--cycle-debugging', action='store_true', help= + 'Once a cycle has been detected, each directory tree for each step in the cycle will be saved to a .cycle directory.' +) +parser.add_argument('--verbose', action='store_true', + help='Prints more info about what is taking place.') +parser.add_argument('--mongo-toolchain-bin-dir', type=str, + help='Which toolchain bin directory to use for this analysis.', + default='/opt/mongodbtoolchain/v4/bin') +parser.add_argument( + '--start-ratio', type=float, help= + 'decimal value between 0 and 1 which indicates what starting ratio index of the total compile commands to run over, can not be greater than the --end-ratio.', + default=0.0) +parser.add_argument( + '--end-ratio', type=float, help= + 'decimal value between 0 and 1 which indicates what ending ratio index of the total compile commands to run over, can not be less than the --start-ratio.', + default=1.0) +command_line_args = parser.parse_args() + +# the current state of all files, contain the cmd_entry, hashes, successes +IWYU_ANALYSIS_STATE: Dict[str, Any] = {} + +# the current state cycles being tracked +IWYU_CYCLE_STATE: Dict[str, Any] = {} + +hash_lookup_locks: Dict[str, threading.Lock] = {} +mtime_hash_lookup: Dict[str, Dict[str, Any]] = {} + +if command_line_args.config_file: + config_file = command_line_args.config_file +else: + config_file = os.path.join(os.path.dirname(__file__), "iwyu_config.yml") + +with open(config_file, "r") as stream: + config = yaml.safe_load(stream) + for key, value in config.items(): + if value is None: + config[key] = [] + +IWYU_OPTIONS = config.get('iwyu_options', []) +IWYU_FIX_OPTIONS = config.get('fix_options', []) +NO_INCLUDES = config.get('no_includes', []) +KEEP_INCLUDES = config.get('keep_includes', []) +SKIP_FILES = tuple(config.get('skip_files', [])) +CYCLE_FILES: List[str] = [] + + +@dataclass +class CompileCommand: + """An entry from compile_commands.json.""" + + file: str + command: str + directory: str + output: str + + +class ResultType(enum.Enum): + """ + Descriptions of enums. + + ERROR: unexpected or unrecognized error cases + FAILED: the IWYU task for a given compile command entry failed + NO_CHANGE: the input header tree and source file have not changed since last time + NOT_RUNNING: sources which we intentionally skip running IWYU all together + RESUBMIT: the IWYU task failed, but it may work later after other header changes + SUCCESS: the IWYU task for a source file has succeeded + """ + + ERROR = enum.auto() + FAILED = enum.auto() + NO_CHANGE = enum.auto() + NOT_RUNNING = enum.auto() + RESUBMIT = enum.auto() + SUCCESS = enum.auto() + + +TOOLCHAIN_DIR = command_line_args.mongo_toolchain_bin_dir +SHUTDOWN_FLAG = False +CLANG_INCLUDES = None +IWYU_OPTIONS = [val for pair in zip(['-Xiwyu'] * len(IWYU_OPTIONS), IWYU_OPTIONS) for val in pair] +if NO_INCLUDES: + NO_INCLUDE_REGEX = re.compile(r'^\s*#include\s+\"(' + '|'.join(NO_INCLUDES) + ')\"') +if KEEP_INCLUDES: + KEEP_INCLUDE_REGEX = re.compile(r'^\s*#include\s+(' + '|'.join(KEEP_INCLUDES) + ')') +CHANGED_FILES_REGEX = re.compile(r"^The\sfull\sinclude-list\sfor\s(.+):$", re.MULTILINE) + + +def printer(message: str) -> None: + """ + Prints output as appropriate. + + We don't print output if we are shutting down because the logs will + explode and original error will be hard to locate. + """ + + if not SHUTDOWN_FLAG or command_line_args.verbose: + tqdm.write(str(message)) + + +def debug_printer(message: str) -> None: + """Print each step in the processing of IWYU.""" + + if command_line_args.verbose: + tqdm.write(str(message)) + + +def failed_return() -> ResultType: + """A common method to allow the processing to continue even after some file fails.""" + + if command_line_args.keep_going: + return ResultType.RESUBMIT + else: + return ResultType.FAILED + + +def in_project_root(file: str) -> bool: + """ + Return true if the file is in the project root. + + This is assuming the project root is the same location + as the compile_commands.json file (the format of compile_commands.json + expects this as well). + """ + + return os.path.abspath(file).startswith( + os.path.abspath(os.path.dirname(command_line_args.compile_commands))) + + +def copy_error_state(cmd_entry: CompileCommand, test_dir: str, + dir_ext: str = '.iwyu_test_dir') -> Optional[str]: + """ + When we fail, we want to copy the current state of the temp dir. + + This is so that the command that was used can be replicated and rerun, + primarily for debugging purposes. + """ + + # we never use a test_dir in check mode, since no files are copied in that mode. + if command_line_args.check: + return None + + # make a directory in the output location that we can store the state of the the + # header dep and source file the compile command was run with, delete old results + base, _ = os.path.splitext(cmd_entry.output) + if os.path.exists(base + dir_ext): + shutil.rmtree(base + dir_ext) + os.makedirs(base + dir_ext, exist_ok=True) + basedir = os.path.basename(test_dir) + error_state_dir = os.path.join(base + dir_ext, basedir) + shutil.copytree(test_dir, error_state_dir) + return error_state_dir + + +def calc_hash_of_file(file: str) -> str: + """ + Calculate the hash of a file. Use mtime as well. + + If the mtime is unchanged, don't do IO, just look up the last hash. + """ + + # we need to lock on specific file io because GIL does not cover system io, so two threads + # could be doing io on the same file at the same time. + if file not in hash_lookup_locks: + hash_lookup_locks[file] = threading.Lock() + with hash_lookup_locks[file]: + if file in mtime_hash_lookup and os.path.getmtime(file) == mtime_hash_lookup[file]['mtime']: + return mtime_hash_lookup[file]['hash'] + else: + hash_val = hashlib.md5(open(file, 'rb').read()).hexdigest() + mtime_hash_lookup[file] = {'mtime': os.path.getmtime(file), 'hash': hash_val} + return hash_val + + +def find_no_include(line: str, lines: List[str], output_lines: List[str]) -> bool: + """ + We need to regex the line to see if it includes an include that matches our NO_INCLUDE_REGEX. + + If so then we do not include that line + when we rewrite the file, and instead we add a IWYU no_include pragma inplace + """ + + no_include_header_found = False + no_include_header = re.findall(NO_INCLUDE_REGEX, line) + + if no_include_header: + no_include_header_found = True + no_include_line = f'// IWYU pragma: no_include "{no_include_header[0]}"\n' + if no_include_line not in lines: + output_lines.append(no_include_line) + return no_include_header_found + + +def add_pragmas(source_files: List[str]): + """ + We automate some of the pragmas so there is not so much manual work. + + There are general cases for some of the pragmas. In this case we open the target + source/header, search via regexes for specific includes we care about, then add + the pragma comments as necessary. + """ + + for source_file in source_files: + + # before we run IWYU, we take a guess at the likely header by swapping .cpp for .h + # so it may not be a real header. After IWYU runs we know exactly where to add the pragmas + # in case we got it wrong the first time around + if not os.path.exists(source_file): + continue + + # we load in the file content operate on it, and then write it back out + output_lines: List[str] = [] + with open(source_file, 'r') as fin: + file_lines = fin.readlines() + for line in file_lines: + + if NO_INCLUDES and find_no_include(line, file_lines, output_lines): + continue + + if KEEP_INCLUDES and re.search(KEEP_INCLUDE_REGEX, + line) and '// IWYU pragma: keep' not in line: + + output_lines.append(line.strip() + " // IWYU pragma: keep\n") + continue + + output_lines.append(line) + + with open(source_file, 'w') as fout: + for line in output_lines: + fout.write(line) + + +def recalc_hashes(deps: List[str], change_dir: Optional[str] = None) -> Dict[str, Any]: + """ + We calculate the hashes from the header dep list generated by the compiler. + + We also create cumulative hash for convenance. + + Some cases we are operating a test directory, but deps are referenced as if they are + in the project root. The change_dir option here allows us to calc the the hashes from + the test directory we may be working in, but still record the deps files in a compat + fashion with other processes that work out of project root, e.g. testing if there was a + change from last time. + """ + + hashes: Dict[str, Any] = {'deps': {}} + full_hash = hashlib.new('md5') + for dep in sorted(list(deps)): + if not in_project_root(dep): + continue + if change_dir: + orig_dep = dep + dep = os.path.join(change_dir, dep) + dep_hash = calc_hash_of_file(dep) + if change_dir: + dep = orig_dep + full_hash.update(dep_hash.encode('utf-8')) + hashes['deps'][dep] = dep_hash + hashes['full_hash'] = full_hash.hexdigest() + return hashes + + +def setup_test_dir(cmd_entry: CompileCommand, test_dir: str) -> List[str]: + """ + Here we are copying the source and required header tree from the main source tree. + + Returns the associate source and header that were copied into the test dir. + + We want an isolated location to perform analysis and apply changes so everything is not + clashing. At this point we don't know for sure what header IWYU is going to associate with the source + but for mongo codebase, 99.9% of the time its just swap the .cpp for .h. We need this to apply + some pragma to keep IWYU from removing headers it doesn't understand (cross platform or + third party like boost or asio). The pragmas are harmless in and of themselves so adding the + mistakenly in the 0.1% of the time is negligible. + """ + + original_sources = [ + orig_source for orig_source in [cmd_entry.file, + os.path.splitext(cmd_entry.file)[0] + '.h'] + if os.path.exists(orig_source) + ] + test_source_files = [os.path.join(test_dir, source_file) for source_file in original_sources] + dep_headers = [dep for dep in IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys()] + + # copy each required header from our source tree into our test dir + # this does cost some time, but the alternative (everything operating in the real source tree) + # was much longer due to constant failures. + for source_file in dep_headers + ['etc/iwyu_mapping.imp']: + if in_project_root(source_file): + os.makedirs(os.path.join(test_dir, os.path.dirname(source_file)), exist_ok=True) + shutil.copyfile(source_file, os.path.join(test_dir, source_file)) + + # need to create dirs for outputs + for output in shlex.split(cmd_entry.output): + os.makedirs(os.path.join(test_dir, os.path.dirname(output)), exist_ok=True) + + return test_source_files + + +def get_clang_includes() -> List[str]: + """ + IWYU needs some extra help to know what default includes clang is going to bring in when it normally compiles. + + The query reliably gets the include dirs that would be used in normal compiles. We cache and reuse the result + so the subprocess only runs once. + """ + global CLANG_INCLUDES # pylint: disable=global-statement + if CLANG_INCLUDES is None: + clang_includes = subprocess.getoutput( + f"{TOOLCHAIN_DIR}/clang++ -Wp,-v -x c++ - -fsyntax-only < /dev/null 2>&1 | sed -e '/^#include <...>/,/^End of search/{{ //!b }};d'" + ).split('\n') + clang_includes = ['-I' + include.strip() for include in clang_includes] + CLANG_INCLUDES = clang_includes + return CLANG_INCLUDES + + +def write_cycle_diff(source_file: str, cycle_dir: str, latest_hashes: Dict[str, Any]) -> None: + """ + Write out the diffs between the last iteration and the latest iteration. + + The file contains the hash for before and after for each file involved in the compilation. + """ + + with open(os.path.join(cycle_dir, 'hashes_diff.txt'), 'w') as out: + dep_list = set( + list(IWYU_ANALYSIS_STATE[source_file]['hashes']['deps'].keys()) + + list(latest_hashes['deps'].keys())) + not_found_str = "not found" + (" " * 23) + for dep in sorted(dep_list): + out.write( + f"Original: {IWYU_ANALYSIS_STATE[source_file]['hashes']['deps'].get(dep, not_found_str)}, Latest: {latest_hashes['deps'].get(dep, not_found_str)} - {dep}\n" + ) + + +def check_for_cycles(cmd_entry: CompileCommand, latest_hashes: Dict[str, Any], + test_dir: str) -> Optional[ResultType]: + """ + IWYU can induce cycles so we should check our previous results to see if a cycle has occurred. + + These cycles can happen if a header change induces some other header change which then inturn induces + the original header change. These cycles are generally harmless and are easily broken with a keep + pragma but finding what files are induces the cycle is the challenge. + + With cycle debug mode enabled, the entire header tree is saved for each iteration in the cycle so + all files can be fully examined. + """ + + if cmd_entry.file not in IWYU_CYCLE_STATE: + IWYU_CYCLE_STATE[cmd_entry.file] = { + 'cycles': [], + } + + if latest_hashes['full_hash'] in IWYU_CYCLE_STATE[cmd_entry.file]['cycles']: + if command_line_args.cycle_debugging: + if 'debug_cycles' not in IWYU_CYCLE_STATE[cmd_entry.file]: + IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles'] = {} + + IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles'][ + latest_hashes['full_hash']] = latest_hashes + + cycle_dir = copy_error_state( + cmd_entry, test_dir, dir_ext= + f".{latest_hashes['full_hash']}.cycle{len(IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles'])}" + ) + write_cycle_diff(cmd_entry.file, cycle_dir, latest_hashes) + if latest_hashes['full_hash'] not in IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles']: + printer(f"{Fore.YELLOW}[5] - Cycle Found!: {cmd_entry.file}{Fore.RESET}") + else: + printer(f"{Fore.RED}[5] - Cycle Done! : {cmd_entry.file}{Fore.RESET}") + return failed_return() + else: + printer(f"{Fore.RED}[5] - Cycle Found!: {cmd_entry.file}{Fore.RESET}") + CYCLE_FILES.append(cmd_entry.file) + return ResultType.SUCCESS + else: + IWYU_CYCLE_STATE[cmd_entry.file]['cycles'].append(latest_hashes['full_hash']) + + return None + + +def write_iwyu_data() -> None: + """Store the data we have acquired during this run so we can resume at the same spot on subsequent runs.""" + + # There might be faster ways to store this like serialization or + # what not, but having human readable json is good for debugging. + # on a full build this takes around 10 seconds to write out. + if IWYU_ANALYSIS_STATE: + try: + # atomic move operation prevents ctrl+c mashing from + # destroying everything, at least we can keep the original + # data safe from emotional outbursts. + with tempfile.NamedTemporaryFile() as temp: + with open(temp.name, 'w') as iwyu_data_file: + json.dump(IWYU_ANALYSIS_STATE, iwyu_data_file, sort_keys=True, indent=4) + shutil.move(temp.name, command_line_args.iwyu_data) + except FileNotFoundError as exc: + if temp.name in str(exc): + pass + + +def need_to_process(cmd_entry: CompileCommand, + custom_printer: Callable[[str], None] = printer) -> Optional[ResultType]: + """ + The first step in the first step for processing a given source file. + + We have a list of skip prefixes, for example build or third_party, but others can be added. + + If it is a file we are not skipping, then we check if we have already done the work by calculating the + hashes and seeing if what we recorded last time has changed. + """ + + if cmd_entry.file.startswith( + SKIP_FILES) or cmd_entry.file in CYCLE_FILES or '/conftest_' in cmd_entry.file: + custom_printer(f"{Fore.YELLOW}[5] - Not running!: {cmd_entry.file}{Fore.RESET}") + return ResultType.NOT_RUNNING + + if IWYU_ANALYSIS_STATE.get(cmd_entry.file): + hashes = recalc_hashes(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys()) + + # we only skip if the matching mode was successful last time, otherwise we assume we need to rerun + mode_success = 'CHECK' if command_line_args.check else 'FIX' + if command_line_args.verbose: + diff_files = list( + set(hashes['deps'].keys()).symmetric_difference( + set(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys()))) + if diff_files: + msg = f"[1] Need to process {cmd_entry.file} because different files:\n" + for file in diff_files: + msg += f'{file}\n' + debug_printer(msg) + for file in IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys(): + if file in hashes['deps'] and hashes['deps'][file] != IWYU_ANALYSIS_STATE[ + cmd_entry.file]['hashes']['deps'][file]: + debug_printer( + f"[1] Need to process {cmd_entry.file} because hash changed:\n{file}: {hashes['deps'][file]}\n{file}: {IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'][file]}" + ) + + if hashes['full_hash'] == IWYU_ANALYSIS_STATE[ + cmd_entry.file]['hashes']['full_hash'] and mode_success in IWYU_ANALYSIS_STATE[ + cmd_entry.file].get('success', []): + custom_printer(f"{Fore.YELLOW}[5] - No Change! : {cmd_entry.file}{Fore.RESET}") + return ResultType.NO_CHANGE + + return None + + +def calc_dep_headers(cmd_entry: CompileCommand) -> Optional[ResultType]: + """ + The second step in the IWYU process. + + We need to get a list of headers which are dependencies so we can copy them to an isolated + working directory (so parallel IWYU changes don't break us). We will switch on preprocessor + for faster generation of the dep file. + + Once we have the deps list, we parse it and calc the hashes of the deps. + """ + + try: + with tempfile.NamedTemporaryFile() as depfile: + + # first time we could be executing a real command so we make sure the dir + # so the compiler is not mad + outputs = shlex.split(cmd_entry.output) + for output in outputs: + out_dir = os.path.dirname(output) + if out_dir: + os.makedirs(out_dir, exist_ok=True) + + # setup up command for fast depfile generation + cmd = cmd_entry.command + cmd += f' -MD -MF {depfile.name}' + cmd = cmd.replace(' -c ', ' -E ') + debug_printer(f"[1] - Getting Deps: {cmd_entry.file}") + + try: + deps_proc = subprocess.run(cmd, shell=True, capture_output=True, text=True, + timeout=300) + except subprocess.TimeoutExpired: + deps_proc = None + pass + + # if successful, record the latest deps with there hashes, otherwise try again later + if deps_proc is None or deps_proc.returncode != 0: + printer(f"{Fore.RED}[5] - Deps Failed!: {cmd_entry.file}{Fore.RESET}") + printer(deps_proc.stderr) + return ResultType.RESUBMIT + else: + with open(depfile.name) as deps: + deps_str = deps.read() + deps_str = deps_str.replace('\\\n', '').strip() + + hashes = recalc_hashes(shlex.split(deps_str)[1:]) + if not IWYU_ANALYSIS_STATE.get(cmd_entry.file): + IWYU_ANALYSIS_STATE[cmd_entry.file] = asdict(cmd_entry) + IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes'] = hashes + IWYU_ANALYSIS_STATE[cmd_entry.file]['success'] = [] + + # if the dep command failed the context will through an execption, we will ignore just + # that case + except FileNotFoundError as exc: + traceback.print_exc() + if depfile.name in str(exc): + pass + + return None + + +def execute_iwyu(cmd_entry: CompileCommand, test_dir: str) -> Union[ResultType, bytes]: + """ + The third step of IWYU analysis. Check mode will stop here. + + Here we want to execute IWYU on our source. Note at this point in fix mode + we will be working out of an isolated test directory which has the + required header tree copied over. Check mode will just pass in the original + project root as the test_dir (the real source tree). + """ + + # assert we are working with a pure clang++ build + if not cmd_entry.command.startswith(f'{TOOLCHAIN_DIR}/clang++'): + printer("unexpected compiler:") + printer(cmd_entry.command) + return ResultType.FAILED + + # swap out for our tool and add in extra options for IWYU + cmd = f'{TOOLCHAIN_DIR}/include-what-you-use' + cmd_entry.command[len(f'{TOOLCHAIN_DIR}/clang++' + ):] + cmd += ' ' + ' '.join(get_clang_includes()) + cmd += ' ' + ' '.join(IWYU_OPTIONS) + + # mimic the PATH we normally use in our build + env = os.environ.copy() + env['PATH'] += f':{TOOLCHAIN_DIR}' + + debug_printer(f'[2] - Running IWYU: {cmd_entry.file}') + proc = subprocess.run(cmd, shell=True, env=env, capture_output=True, cwd=test_dir) + + # IWYU has some bugs about forward declares I am assuming, because in some cases even though + # we have passed --no_fwd_decls it still sometimes recommend forward declares and sometimes they + # are wrong and cause compilation errors. + remove_fwd_declares = [] + for line in proc.stderr.decode('utf-8').split('\n'): + line = line.strip() + if not line.endswith(':') and not line.startswith( + ('#include ', '-')) and ('class ' in line or 'struct ' in line): + continue + remove_fwd_declares.append(line) + iwyu_output = '\n'.join(remove_fwd_declares) + + # IWYU has weird exit codes, where a >=2 is considered success: + # https://github.com/include-what-you-use/include-what-you-use/blob/clang_12/iwyu_globals.h#L27-L34 + if command_line_args.check and proc.returncode != 2: + printer(f"{Fore.RED}[2] - IWYU Failed: {cmd_entry.file}{Fore.RESET}") + if proc.returncode < 2: + printer(f"exited with error: {proc.returncode}") + else: + printer(f"changes required: {proc.returncode - 2}") + printer(iwyu_output) + return failed_return() + elif proc.returncode < 2: + printer(f'{Fore.RED}[2] - IWYU Failed : {cmd_entry.file}{Fore.RESET}') + printer(cmd) + printer(str(proc.returncode)) + printer(proc.stderr.decode('utf-8')) + copy_error_state(cmd_entry, test_dir) + return failed_return() + + # save the output for debug or inspection later + with open(os.path.splitext(cmd_entry.output)[0] + '.iwyu', 'w') as iwyu_out: + iwyu_out.write(iwyu_output) + + return iwyu_output.encode('utf-8') + + +def apply_fixes(cmd_entry: CompileCommand, iwyu_output: bytes, + test_dir: str) -> Optional[ResultType]: + """ + Step 4 in the IWYU process. + + We need to run the fix_includes script to apply the output from the IWYU binary. + """ + cmd = [f'{sys.executable}', f'{TOOLCHAIN_DIR}/fix_includes.py'] + IWYU_FIX_OPTIONS + + debug_printer(f'[3] - Apply fixes : {cmd_entry.file}') + try: + subprocess.run(cmd, capture_output=True, input=iwyu_output, timeout=180, cwd=test_dir) + except subprocess.TimeoutExpired: + printer(f"{Fore.RED}[5] - Apply failed: {cmd_entry.file}{Fore.RESET}") + return ResultType.RESUBMIT + + return None + + +def test_compile(cmd_entry: CompileCommand, test_dir: str) -> Optional[ResultType]: + """ + Step 5 in the IWYU analysis and the last step for fix mode. + + We run the normal compile command in a test directory and make sure it is successful before + it will be copied back into the real source tree for inclusion into other jobs. + """ + + try: + with tempfile.NamedTemporaryFile() as depfile: + debug_printer(f"[4] - Test compile: {cmd_entry.file}") + + # we want to capture the header deps again because IWYU may have changed them + cmd = cmd_entry.command + cmd += f' -MMD -MF {depfile.name}' + try: + p3 = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=300, + cwd=test_dir) + except (subprocess.TimeoutExpired, MemoryError): + p3 = None + pass + + # our test compile has failed so we need to report and setup for debug + if p3 is not None and p3.returncode != 0: + printer(f"{Fore.RED}[5] - IWYU Failed!: {cmd_entry.file}{Fore.RESET}") + printer(f"{cmd}") + printer(f"{p3.stderr}") + copy_error_state(cmd_entry, test_dir) + return failed_return() + + else: + with open(depfile.name) as deps: + # calculate the hashes of the deps used to create + # this successful compile. + deps_str = deps.read() + deps_str = deps_str.replace('\\\n', '').strip() + hashes = recalc_hashes(shlex.split(deps_str)[1:], change_dir=test_dir) + + if result := check_for_cycles(cmd_entry, hashes, test_dir): + return result + + IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes'] = hashes + if 'FIX' not in IWYU_ANALYSIS_STATE[cmd_entry.file]['success']: + IWYU_ANALYSIS_STATE[cmd_entry.file]['success'].append('FIX') + printer(f"{Fore.GREEN}[5] - IWYU Success: {cmd_entry.file}{Fore.RESET}") + return ResultType.SUCCESS + + # if we failed, the depfile may not have been generated, so check for it + # ignore it + except FileNotFoundError as exc: + if depfile.name in str(exc): + pass + + return None + + +def intialize_deps(cmd_entry: CompileCommand) -> Tuple[ResultType, CompileCommand]: + """ + When running in fix mode, we take some time to initialize the header deps. + + This is mainly used to improve the overall time to complete full analysis. We want process + the source files in order of files with least dependencies to most dependencies. The rational + is that if it has a lot of dependencies we should do last so any changes in those dependencies + are automatically accounted for and the change of need to do rework is lessened. Also the + progress bar can be more accurate and not count skip files. + """ + + # step 1 + if result := need_to_process(cmd_entry, custom_printer=debug_printer): + return result, cmd_entry + + # if we have deps from a previous that should be a good enough indicator + # of how dependency heavy it is, and its worth just taking that over + # needing to invoke the compiler. + try: + if len(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps']): + return ResultType.SUCCESS, cmd_entry + + except KeyError: + pass + + if result := calc_dep_headers(cmd_entry): + return result, cmd_entry + + return ResultType.SUCCESS, cmd_entry + + +def check_iwyu(cmd_entry: CompileCommand) -> ResultType: + """ + One of the two thread functions the main thread pool executor will call. + + Here we execute up to step 3 (steps at the top comment) and report success + if IWYU reports no required changes. + """ + + # step 1 + if result := need_to_process(cmd_entry): + return result + + # step 2 + if result := calc_dep_headers(cmd_entry): + return result + + # step 3 + iwyu_out = execute_iwyu(cmd_entry, '.') + if isinstance(iwyu_out, ResultType): + return iwyu_out + + # success! + printer(f"{Fore.GREEN}[2] - IWYU Success: {cmd_entry.file}{Fore.RESET}") + if "CHECK" not in IWYU_ANALYSIS_STATE[cmd_entry.file]['success']: + IWYU_ANALYSIS_STATE[cmd_entry.file]['success'].append('CHECK') + return ResultType.SUCCESS + + +def fix_iwyu(cmd_entry: CompileCommand) -> ResultType: + """ + One of the two thread functions the main thread pool executor will call. + + Here we execute up to step 5 (steps at the top comment) and report success + if we are able to successfully compile the original command after IWYU + has made its changes. + """ + + # step 1 + if result := need_to_process(cmd_entry): + return result + + # step 2 + if result := calc_dep_headers(cmd_entry): + return result + + with tempfile.TemporaryDirectory() as test_dir: + + # the changes will be done in an isolated test dir so not to conflict with + # other concurrent processes. + test_source_files = setup_test_dir(cmd_entry, test_dir) + + # a first round of pragmas to make sure IWYU doesn't fail or remove things we dont want + add_pragmas(test_source_files) + + # step 3 + iwyu_out = execute_iwyu(cmd_entry, test_dir) + if isinstance(iwyu_out, ResultType): + return iwyu_out + + # now we can extract exactly what files IWYU operated on and copy only those back + changed_files = [ + os.path.join(test_dir, file) + for file in re.findall(CHANGED_FILES_REGEX, iwyu_out.decode('utf-8')) + if in_project_root(file) + ] + test_source_files += [file for file in changed_files if file not in test_source_files] + + # step 4 + if result := apply_fixes(cmd_entry, iwyu_out, test_dir): + return result + + # a final round of pragmas for the next time this is run through IWYU + add_pragmas(test_source_files) + + # step 5 + result = test_compile(cmd_entry, test_dir) + if result == ResultType.SUCCESS: + for file in test_source_files: + if os.path.exists(file): + shutil.move(file, file[len(test_dir) + 1:]) + + return result + + +def run_iwyu(cmd_entry: CompileCommand) -> Tuple[ResultType, CompileCommand]: + """Intermediate function which delegates the underlying mode to run.""" + + if command_line_args.check: + return check_iwyu(cmd_entry), cmd_entry + else: + return fix_iwyu(cmd_entry), cmd_entry + + +def main() -> None: + """Main function.""" + global IWYU_ANALYSIS_STATE, SHUTDOWN_FLAG # pylint: disable=global-statement + atexit.register(write_iwyu_data) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=len(os.sched_getaffinity(0)) + 4) as executor: + + # ctrl+c tru to shutdown as fast as possible. + def sigint_handler(the_signal, frame): + executor.shutdown(wait=False, cancel_futures=True) + sys.exit(1) + + signal.signal(signal.SIGINT, sigint_handler) + + # load in any data from prior runs + if os.path.exists(command_line_args.iwyu_data): + with open(command_line_args.iwyu_data) as iwyu_data_file: + IWYU_ANALYSIS_STATE = json.load(iwyu_data_file) + + # load in the compile commands + with open(command_line_args.compile_commands) as compdb_file: + compiledb = [CompileCommand(**json_data) for json_data in json.load(compdb_file)] + + # assert the generated source code has been generated + for cmd_entry in compiledb: + if cmd_entry.file.endswith('_gen.cpp') and not os.path.exists(cmd_entry.file): + printer(f"{Fore.RED}[5] - Missing Gen!: {cmd_entry.file}{Fore.RESET}") + printer( + f"Error: missing generated file {cmd_entry.file}, make sure generated-sources are generated." + ) + sys.exit(1) + + total_cmds = len(compiledb) + start_index = int(total_cmds * command_line_args.start_ratio) + if start_index < 0: + start_index = 0 + if start_index > total_cmds: + start_index = total_cmds + + end_index = int(total_cmds * command_line_args.end_ratio) + if end_index < 0: + end_index = 0 + if end_index > total_cmds: + end_index = total_cmds + + if start_index == end_index: + print(f"Error: start_index and end_index are the same: {start_index}") + sys.exit(1) + if start_index > end_index: + print( + f"Error: start_index {start_index} can not be greater than end_index {end_index}" + ) + sys.exit(1) + + print(f"Analyzing compile commands from {start_index} to {end_index}.") + compiledb = compiledb[start_index:end_index] + if not command_line_args.check: + # We can optimize the order we process things by processing source files + # with the least number of dependencies first. This is a cost up front + # but will result in huge gains in the amount of re-processing to be done. + printer("Getting Initial Header Dependencies...") + cmd_entry_list = [] + try: + with tqdm(total=len(compiledb), disable=None) as pbar: + + # create and run the dependency check jobs + future_cmd = { + executor.submit(intialize_deps, cmd_entry): cmd_entry + for cmd_entry in compiledb + } + for future in concurrent.futures.as_completed(future_cmd): + result, cmd_entry = future.result() + if result != ResultType.NOT_RUNNING: + cmd_entry_list.append(cmd_entry) + pbar.update(1) + except Exception: + SHUTDOWN_FLAG = True + traceback.print_exc() + executor.shutdown(wait=True, cancel_futures=True) + sys.exit(1) + else: + cmd_entry_list = compiledb + + try: + + # this loop will keep looping until a full run produce no new changes. + changes_left = True + while changes_left: + changes_left = False + + with tqdm(total=len(cmd_entry_list), disable=None) as pbar: + + # create and run the IWYU jobs + def dep_sorted(cmd_entry): + try: + return len(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps']) + except KeyError: + return 0 + + future_cmd = { + executor.submit(run_iwyu, cmd_entry): cmd_entry + for cmd_entry in sorted(cmd_entry_list, key=dep_sorted) + } + + # process the results + for future in concurrent.futures.as_completed(future_cmd): + result, cmd_entry = future.result() + + # any result which implies there could be changes required sets the + # next loop + if result not in (ResultType.NO_CHANGE, ResultType.NOT_RUNNING): + changes_left = True + + # if a file is considered done for this loop, update the status bar + if result in [ + ResultType.SUCCESS, ResultType.NO_CHANGE, ResultType.NOT_RUNNING + ]: + pbar.update(1) + # resubmit jobs which may have a better change to run later + elif result == ResultType.RESUBMIT: + executor.submit(run_iwyu, cmd_entry) + # handle a failure case, excpetion quickly drops us out of this loop. + else: + SHUTDOWN_FLAG = True + tqdm.write( + f"{result.name}: Shutting down other threads, please be patient." + ) + raise Exception( + f'Shutdown due to {result.name} {cmd_entry["file"]}') + + except Exception: + SHUTDOWN_FLAG = True + traceback.print_exc() + executor.shutdown(wait=True, cancel_futures=True) + sys.exit(1) + finally: + if CYCLE_FILES: + printer(f"{Fore.YELLOW} Cycles detected:") + for file in CYCLE_FILES: + printer(f' {file}') + + +main() diff --git a/buildscripts/iwyu/test/basic/a.h b/buildscripts/iwyu/test/basic/a.h new file mode 100644 index 00000000000..ad792ace34b --- /dev/null +++ b/buildscripts/iwyu/test/basic/a.h @@ -0,0 +1 @@ +#include "b.h" diff --git a/buildscripts/iwyu/test/basic/b.cpp b/buildscripts/iwyu/test/basic/b.cpp new file mode 100644 index 00000000000..dcbc8627764 --- /dev/null +++ b/buildscripts/iwyu/test/basic/b.cpp @@ -0,0 +1,5 @@ +#include "a.h" + +type_b return_b_function() { + return type_b(); +} diff --git a/buildscripts/iwyu/test/basic/b.h b/buildscripts/iwyu/test/basic/b.h new file mode 100644 index 00000000000..422d7626e90 --- /dev/null +++ b/buildscripts/iwyu/test/basic/b.h @@ -0,0 +1 @@ +class type_b {}; diff --git a/buildscripts/iwyu/test/basic/expected_results.py b/buildscripts/iwyu/test/basic/expected_results.py new file mode 100644 index 00000000000..98ed60ea4fb --- /dev/null +++ b/buildscripts/iwyu/test/basic/expected_results.py @@ -0,0 +1,17 @@ +import os +import sys + +EXPECTED_B_CPP = """ +#include "b.h" + +type_b return_b_function() { + return type_b(); +} +""" + +with open('b.cpp') as f: + content = f.read() + if content != EXPECTED_B_CPP: + print(f'Actual:\n"""{content}"""') + print(f'Expected:\n"""{EXPECTED_B_CPP}"""') + sys.exit(1) diff --git a/buildscripts/iwyu/test/basic/test_config.yml b/buildscripts/iwyu/test/basic/test_config.yml new file mode 100644 index 00000000000..a5b906f5558 --- /dev/null +++ b/buildscripts/iwyu/test/basic/test_config.yml @@ -0,0 +1,25 @@ +# options passed to IWYU +iwyu_options: + - '--max_line_length=100' + - '--no_fwd_decls' + - '--prefix_header_includes=add' + - '--transitive_includes_only' + +# options passed to the fix script +fix_options: + - '--blank_lines' + - '--nocomments' + - '--noreorder' + - '--safe_headers' + +# filename regex to swap no_include in place +# quotes and brackets not included quotes are always assumed +# since this is targeting IWYU added headers +no_includes: + +# prefixes (non regex) to skip +skip_files: + +# regex file paths to add keep pragma +# include quotes are angle brackets +keep_includes: diff --git a/buildscripts/iwyu/test/no_include/a.h b/buildscripts/iwyu/test/no_include/a.h new file mode 100644 index 00000000000..ad792ace34b --- /dev/null +++ b/buildscripts/iwyu/test/no_include/a.h @@ -0,0 +1 @@ +#include "b.h" diff --git a/buildscripts/iwyu/test/no_include/b.cpp b/buildscripts/iwyu/test/no_include/b.cpp new file mode 100644 index 00000000000..dcbc8627764 --- /dev/null +++ b/buildscripts/iwyu/test/no_include/b.cpp @@ -0,0 +1,5 @@ +#include "a.h" + +type_b return_b_function() { + return type_b(); +} diff --git a/buildscripts/iwyu/test/no_include/b.h b/buildscripts/iwyu/test/no_include/b.h new file mode 100644 index 00000000000..422d7626e90 --- /dev/null +++ b/buildscripts/iwyu/test/no_include/b.h @@ -0,0 +1 @@ +class type_b {}; diff --git a/buildscripts/iwyu/test/no_include/expected_results.py b/buildscripts/iwyu/test/no_include/expected_results.py new file mode 100644 index 00000000000..90bda7e15a4 --- /dev/null +++ b/buildscripts/iwyu/test/no_include/expected_results.py @@ -0,0 +1,18 @@ +import os +import sys + +EXPECTED_B_CPP = """// IWYU pragma: no_include "b.h" + +#include "a.h" // IWYU pragma: keep + +type_b return_b_function() { + return type_b(); +} +""" + +with open('b.cpp') as f: + content = f.read() + if content != EXPECTED_B_CPP: + print(f'Actual:\n"""{content}"""') + print(f'Expected:\n"""{EXPECTED_B_CPP}"""') + sys.exit(1) diff --git a/buildscripts/iwyu/test/no_include/test_config.yml b/buildscripts/iwyu/test/no_include/test_config.yml new file mode 100644 index 00000000000..e441f5bac35 --- /dev/null +++ b/buildscripts/iwyu/test/no_include/test_config.yml @@ -0,0 +1,27 @@ +# options passed to IWYU +iwyu_options: + - '--max_line_length=100' + - '--no_fwd_decls' + - '--prefix_header_includes=add' + - '--transitive_includes_only' + +# options passed to the fix script +fix_options: + - '--blank_lines' + - '--nocomments' + - '--noreorder' + - '--safe_headers' + +# filename regex to swap no_include in place +# quotes and brackets not included quotes are always assumed +# since this is targeting IWYU added headers +no_includes: + - 'b.h' + +# prefixes (non regex) to skip +skip_files: + +# regex file paths to add keep pragma +# include quotes are angle brackets +keep_includes: +- '"a.h"' diff --git a/buildscripts/iwyu/test/run_tests.py b/buildscripts/iwyu/test/run_tests.py new file mode 100644 index 00000000000..d0e32f00a8d --- /dev/null +++ b/buildscripts/iwyu/test/run_tests.py @@ -0,0 +1,97 @@ +import pathlib +import yaml +import json +import shutil +import os +import glob +import subprocess +import sys +import argparse +import concurrent.futures + +parser = argparse.ArgumentParser(description='Run tests for the IWYU analysis script.') + +parser.add_argument('--mongo-toolchain-bin-dir', type=str, + help='Which toolchain bin directory to use for this analysis.', + default='/opt/mongodbtoolchain/v4/bin') + +args = parser.parse_args() + +if os.getcwd() != pathlib.Path(__file__).parent: + print( + f"iwyu test script must run in the tests directory, changing dirs to {pathlib.Path(__file__).parent.resolve()}" + ) + os.chdir(pathlib.Path(__file__).parent.resolve()) + +analysis_script = pathlib.Path(__file__).parent.parent / 'run_iwyu_analysis.py' + + +def run_test(entry): + print(f"Running test {pathlib.Path(entry)}...") + test_dir = pathlib.Path(entry) / 'test_run' + if os.path.exists(test_dir): + shutil.rmtree(test_dir) + + shutil.copytree(pathlib.Path(entry), test_dir) + + source_files = glob.glob('**/*.cpp', root_dir=test_dir, recursive=True) + compile_commands = [] + + for source_file in source_files: + output = os.path.splitext(source_file)[0] + '.o' + compile_commands.append({ + 'file': source_file, + 'command': f"{args.mongo_toolchain_bin_dir}/clang++ -o {output} -c {source_file}", + "directory": os.path.abspath(test_dir), + "output": output, + }) + + with open(test_dir / 'compile_commands.json', 'w') as compdb: + json.dump(compile_commands, compdb) + + os.makedirs(test_dir / 'etc', exist_ok=True) + with open(test_dir / 'etc' / 'iwyu_mapping.imp', 'w') as mapping: + mapping.write( + '[{include: ["\\"placeholder.h\\"", "private", "\\"placeholder2.h\\"", "public"]}]') + + iwyu_run = subprocess.run( + [sys.executable, analysis_script, '--verbose', '--config-file=test_config.yml'], text=True, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=test_dir) + + results_run = subprocess.run( + [sys.executable, pathlib.Path(entry) / 'expected_results.py'], stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, text=True, cwd=test_dir) + + msg = '\n'.join([iwyu_run.stdout, results_run.stdout, f"FAILED!: {pathlib.Path(entry)}"]) + msg = '\n'.join([f"[{pathlib.Path(entry).name}] {line}" for line in msg.split('\n')]) + + if results_run.returncode != 0: + return results_run.returncode, msg, pathlib.Path(entry).name + else: + return results_run.returncode, f"[{pathlib.Path(entry).name}] PASSED!: {pathlib.Path(entry)}", pathlib.Path( + entry).name + + +failed_tests = [] +with concurrent.futures.ThreadPoolExecutor( + max_workers=len(os.sched_getaffinity(0)) + 4) as executor: + + # create and run the IWYU jobs + future_cmd = { + executor.submit(run_test, entry): entry + for entry in pathlib.Path(__file__).parent.glob('*') if os.path.isdir(entry) + } + + # process the results + for future in concurrent.futures.as_completed(future_cmd): + result, message, test_name = future.result() + if result != 0: + failed_tests += [test_name] + print(message) + +print("\n***Tests complete.***") +if failed_tests: + print("The following tests failed:") + for test in failed_tests: + print(' - ' + test) + print("Please review the logs above for more information.") |