summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Moody <dmoody256@gmail.com>2023-05-11 19:48:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-11 22:23:42 +0000
commit14e3b091373c63edb7ece3b47f35f3dec198fdad (patch)
treed997311418d8c25e18a5fc266c503127723181d1
parente850381e039d22d73d6a5fa2e3ca41f76f7dfa1c (diff)
downloadmongo-14e3b091373c63edb7ece3b47f35f3dec198fdad.tar.gz
SERVER-71123 implement IWYU tool and add required human changes
-rw-r--r--.gitignore4
-rw-r--r--buildscripts/iwyu/README.md64
-rw-r--r--buildscripts/iwyu/iwyu_config.yml72
-rw-r--r--buildscripts/iwyu/run_iwyu_analysis.py996
-rw-r--r--buildscripts/iwyu/test/basic/a.h1
-rw-r--r--buildscripts/iwyu/test/basic/b.cpp5
-rw-r--r--buildscripts/iwyu/test/basic/b.h1
-rw-r--r--buildscripts/iwyu/test/basic/expected_results.py17
-rw-r--r--buildscripts/iwyu/test/basic/test_config.yml25
-rw-r--r--buildscripts/iwyu/test/no_include/a.h1
-rw-r--r--buildscripts/iwyu/test/no_include/b.cpp5
-rw-r--r--buildscripts/iwyu/test/no_include/b.h1
-rw-r--r--buildscripts/iwyu/test/no_include/expected_results.py18
-rw-r--r--buildscripts/iwyu/test/no_include/test_config.yml27
-rw-r--r--buildscripts/iwyu/test/run_tests.py97
-rw-r--r--etc/evergreen_yml_components/definitions.yml15
-rw-r--r--etc/evergreen_yml_components/variants/compile_static_analysis.yml1
-rw-r--r--etc/iwyu_mapping.imp16
-rw-r--r--etc/pip/components/lint.req2
-rw-r--r--site_scons/site_tools/compilation_db.py2
-rw-r--r--src/mongo/bson/mutable/mutable_bson_test_utils.h2
-rw-r--r--src/mongo/client/server_discovery_monitor.h3
-rw-r--r--src/mongo/db/commands/apply_ops_cmd.cpp2
-rw-r--r--src/mongo/db/database_name.h6
-rw-r--r--src/mongo/db/exec/js_function.h2
-rw-r--r--src/mongo/db/fts/unicode/byte_vector.h8
-rw-r--r--src/mongo/db/namespace_string.h6
-rw-r--r--src/mongo/db/operation_context_test.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_facet.h1
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.cpp47
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.h47
-rw-r--r--src/mongo/db/pipeline/document_source_unwind_test.cpp4
-rw-r--r--src/mongo/db/pipeline/inner_pipeline_stage_impl.h1
-rw-r--r--src/mongo/db/pipeline/partition_key_comparator.h1
-rw-r--r--src/mongo/db/query/query_planner.cpp5
-rw-r--r--src/mongo/db/repl/apply_ops_command_info.h2
-rw-r--r--src/mongo/db/repl/storage_timestamp_test.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_data_replication.h4
-rw-r--r--src/mongo/db/s/resharding/resharding_data_replication_test.cpp3
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service_test.cpp19
-rw-r--r--src/mongo/db/sorter/sorter_test.cpp2
-rw-r--r--src/mongo/db/traffic_reader_main.cpp2
-rw-r--r--src/mongo/db/update/bit_node_test.cpp2
-rw-r--r--src/mongo/db/update/update_node_test_fixture.h1
-rw-r--r--src/mongo/executor/pinned_connection_task_executor.cpp2
-rw-r--r--src/mongo/idl/idl_test.h2
-rw-r--r--src/mongo/idl/server_parameter_test_util.h1
-rw-r--r--src/mongo/logv2/log_options.h1
-rw-r--r--src/mongo/logv2/uassert_sink.h1
-rw-r--r--src/mongo/platform/process_id.cpp2
-rw-r--r--src/mongo/platform/visibility_test_lib1.h1
-rw-r--r--src/mongo/scripting/dbdirectclient_factory.h3
-rw-r--r--src/mongo/unittest/assert.h1
-rw-r--r--src/mongo/unittest/inline_auto_update.h2
-rw-r--r--src/mongo/util/assert_util.h2
-rw-r--r--src/mongo/util/log_with_sampling.h1
-rw-r--r--src/mongo/util/quick_exit.cpp2
-rw-r--r--src/mongo/util/represent_as.h1
-rw-r--r--src/mongo/util/text.cpp2
-rw-r--r--src/mongo/watchdog/watchdog.cpp2
60 files changed, 1478 insertions, 94 deletions
diff --git a/.gitignore b/.gitignore
index ce851fdf7b2..1a02da5bdda 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,8 @@
venv
*~
+*.test_iwyu.h
+*.test_iwyu.cpp
*.swp
*.o
*.os
@@ -44,6 +46,7 @@ venv
*.eslintcache
*#
.#*
+iwyu.dat
/src/mongo/*/*Debug*/
/src/mongo/*/*/*Debug*/
@@ -60,6 +63,7 @@ venv
/src/third_party/*/*.lastbuildstate
/buildscripts/libdeps/graph_visualizer_web_stack/build
/buildscripts/libdeps/graph_visualizer_web_stack/node_modules
+buildscripts/iwyu/test/*/test_run
libdeps.graphml
build-metrics.json
config.log
diff --git a/buildscripts/iwyu/README.md b/buildscripts/iwyu/README.md
new file mode 100644
index 00000000000..2e925d7500a
--- /dev/null
+++ b/buildscripts/iwyu/README.md
@@ -0,0 +1,64 @@
+# IWYU Analysis tool
+
+This tool will run
+[include-what-you-use](https://github.com/include-what-you-use/include-what-you-use)
+(IWYU) analysis across the codebase via `compile_commands.json`.
+
+The `iwyu_config.yml` file consists of the current options and automatic
+pragma marking. You can exclude files from the analysis here.
+
+The tool has two main modes of operation, `fix` and `check` modes. `fix`
+mode will attempt to make changes to the source files based off IWYU's
+suggestions. The check mode will simply check if there are any suggestion
+at all.
+
+`fix` mode will take a long time to run, as the tool needs to rerun any
+source in which a underlying header was changed to ensure things are not
+broken, and so therefore ends up recompile the codebase several times over.
+
+For more information please refer the the script `--help` option.
+
+# Example usage:
+
+First you must generate the `compile_commands.json` file via this command:
+
+```
+python3 buildscripts/scons.py --build-profile=compiledb compiledb
+```
+
+Next you can run the analysis:
+
+```
+python3 buildscripts/iwyu/run_iwyu_analysis.py
+```
+The default mode is fix mode, and it will start making changes to the code
+if any changes are found.
+
+# Debugging failures
+
+Occasionally IWYU tool will run into problems where it is unable to suggest
+valid changes and the changes will cause things to break (not compile). When
+it his a failure it will copy the source and all the header's that were used
+at the time of the compilation into a directory where the same command can be
+run to reproduce the error.
+
+You can examine the suggested changes in the source and headers and compare
+them to the working source tree. Then you can make corrective changes to allow
+ IWYU to get past the failure.
+
+IWYU is not perfect and it make several mistakes that a human can understand
+and fix appropriately.
+
+# Running the tests
+
+This tool includes its own end to end testing. The test directory includes
+sub directories which contain source and iwyu configs to run the tool against.
+The tests will then compare the results to built in expected results and fail
+if the the tests are not producing the expected results.
+
+To run the tests use the command:
+
+```
+cd buildscripts/iwyu/test
+python3 run_tests.py
+```
diff --git a/buildscripts/iwyu/iwyu_config.yml b/buildscripts/iwyu/iwyu_config.yml
new file mode 100644
index 00000000000..2229242b427
--- /dev/null
+++ b/buildscripts/iwyu/iwyu_config.yml
@@ -0,0 +1,72 @@
+# options passed to IWYU
+iwyu_options:
+ - '--mapping_file=etc/iwyu_mapping.imp'
+ - '--no_fwd_decls'
+ - '--prefix_header_includes=add'
+ - '--transitive_includes_only'
+
+# options passed to the fix script
+fix_options:
+ - '--blank_lines'
+ - '--nocomments'
+ - '--noreorder'
+ - '--separate_project_includes=mongo'
+ - '--safe_headers'
+ - '--only_re=^src/mongo\/.*'
+ # TODO SERVER-77051 we will eventually turn this on when our codebase is cleaned up with out.
+ # - '--nosafe_headers'
+
+# filename regex to swap no_include in place
+# quotes and brackets not included quotes are always assumed
+# since this is targeting IWYU added headers
+no_includes:
+ # avoid boost crazyness
+ - 'boost/.+/detail/.+'
+ - 'asio/impl/.+'
+ - 'boost/.+\.ipp'
+ # arch specific
+ - 'boost/predef/hardware/simd/x86.+'
+ - 'emmintrin\.h'
+ # we use a third party format which confuses IWYU
+ - 'format\.h'
+ # this is a link time symbol overloading thing not meant to be included
+ - 'libunwind-x86_64\.h'
+ # abuse of preprocessor
+ - 'mongo/db/namespace_string_reserved\.def\.h'
+
+# prefixes (non regex) to skip
+skip_files:
+ - 'src/third_party'
+ - 'build/'
+ - 'src/mongo/tools/mongo_tidy_checks'
+ - 'src/mongo/util/net' # causes linkage issues
+ # IWYU confused on forward declares
+ - 'src/mongo/db/exec/near.cpp'
+ - 'src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp'
+ - 'src/mongo/transport/asio/asio_transport_layer.cpp'
+ # causes IWYU to crash:
+ - 'src/mongo/db/update/update_internal_node.cpp'
+ - 'src/mongo/db/update/update_array_node.cpp'
+ - 'src/mongo/db/update/update_object_node.cpp'
+ - 'src/mongo/db/update/update_array_node_test.cpp'
+ - 'src/mongo/db/update/update_object_node_test.cpp'
+ - 'src/mongo/util/options_parser/environment.cpp'
+ - 'src/mongo/util/options_parser/option_section.cpp'
+
+# regex file paths to add keep pragma
+# include quotes are angle brackets
+keep_includes:
+ - '<fmt/printf\.h>'
+ - '<fmt/ranges\.h>'
+ - '<fmt/chrono\.h>'
+ - '<asio\.hpp>'
+ - '<boost/utility/in_place_factory\.hpp>'
+ - '<libunwind.h>'
+ - '<fstream>' # IWYU messes up template instantiation
+ - '"mongo/rpc/object_check\.h"'
+ - '"mongo/base/init\.h"'
+ - '"mongo/scripting/mozjs/wrapconstrainedmethod\.h"'
+ - '"mongo/dbtests/dbtests\.h"' # this is due to using statements in the header
+ - '"mongo/config\.h"'
+ - '"mongo/util/overloaded_visitor\.h"'
+ - '"mongo/db/query/optimizer/node\.h"'
diff --git a/buildscripts/iwyu/run_iwyu_analysis.py b/buildscripts/iwyu/run_iwyu_analysis.py
new file mode 100644
index 00000000000..85606056cc1
--- /dev/null
+++ b/buildscripts/iwyu/run_iwyu_analysis.py
@@ -0,0 +1,996 @@
+#!/usr/bin/env python3
+"""
+TOOL FUNCTIONAL DESCRIPTION.
+
+Currently the tool works by running IWYU on a subset of compile_commands.json
+(the ones we care about like checked in mongo source) and testing each change
+in a copy of the original source/header tree so that other compiles are not
+affected until it passes a normal compile itself. Due to header dependencies
+we must recompile the source files to catch issue IWYU may have introduced
+with some dependent header change. Header dependencies do not form a DAG so
+we can not process sources in a deterministic fashion. The tool will loop
+through all the compilations until all dependents in a compilation are
+determined unchanged from the last time the compilation was performed.
+
+The general workflow used here is to run the tool till there no changes
+(several hours on rhel-xxlarge) and fix the errors either in the tool config
+or as a manual human change in the code.
+
+TOOL TECHNICAL DESCRIPTION:
+
+Regarding the code layout, the main function setups a thread pool executor
+and processes each source from the compile_commands. From there it runs a
+thread function and within that 5 parts (each there own function) for
+each source file:
+
+1. Skip if deps are unchanged
+2. Get the headers deps via -MMD
+3. Run IWYU
+4. Apply Fixes
+5. test compile, record new header deps if passed
+
+The tool uses mtime and MD5 hashing to know if any header dep has changed.
+
+"""
+
+import argparse
+import json
+import subprocess
+import tempfile
+import shlex
+import os
+import re
+import concurrent.futures
+import hashlib
+import atexit
+import traceback
+import threading
+import shutil
+import signal
+import sys
+import yaml
+import enum
+from dataclasses import dataclass, asdict
+from typing import Dict, List, Any, Optional, Callable, Union, Tuple
+
+from tqdm import tqdm
+from colorama import init as colorama_init
+from colorama import Fore
+
+colorama_init()
+
+parser = argparse.ArgumentParser(description='Run include what you use and test output')
+
+parser.add_argument('--compile-commands', metavar='FILE', type=str, default='compile_commands.json',
+ help='Path to the compile commands file to use.')
+parser.add_argument(
+ '--check', action='store_true', help=
+ 'Enables check mode, which does not apply fixes and only runs to see if any files produce IWYU changes. Exit 0 if no new changes detected.'
+)
+parser.add_argument(
+ '--config-file', metavar='FILE', type=str, default="", help=
+ 'Enables check mode, which does not apply fixes and only runs to see if any files produce IWYU changes. Exit 0 if no new changes detected.'
+)
+parser.add_argument(
+ '--iwyu-data', metavar='FILE', type=str, default='iwyu.dat',
+ help='Location of data used by IWYU, contains hash and status info about all files.')
+parser.add_argument(
+ '--keep-going', action='store_true', help=
+ 'Do not stop on errors, instead resubmit the job to try again later (after things may have been fixed elsewhere)'
+)
+parser.add_argument(
+ '--cycle-debugging', action='store_true', help=
+ 'Once a cycle has been detected, each directory tree for each step in the cycle will be saved to a .cycle directory.'
+)
+parser.add_argument('--verbose', action='store_true',
+ help='Prints more info about what is taking place.')
+parser.add_argument('--mongo-toolchain-bin-dir', type=str,
+ help='Which toolchain bin directory to use for this analysis.',
+ default='/opt/mongodbtoolchain/v4/bin')
+parser.add_argument(
+ '--start-ratio', type=float, help=
+ 'decimal value between 0 and 1 which indicates what starting ratio index of the total compile commands to run over, can not be greater than the --end-ratio.',
+ default=0.0)
+parser.add_argument(
+ '--end-ratio', type=float, help=
+ 'decimal value between 0 and 1 which indicates what ending ratio index of the total compile commands to run over, can not be less than the --start-ratio.',
+ default=1.0)
+command_line_args = parser.parse_args()
+
+# the current state of all files, contain the cmd_entry, hashes, successes
+IWYU_ANALYSIS_STATE: Dict[str, Any] = {}
+
+# the current state cycles being tracked
+IWYU_CYCLE_STATE: Dict[str, Any] = {}
+
+hash_lookup_locks: Dict[str, threading.Lock] = {}
+mtime_hash_lookup: Dict[str, Dict[str, Any]] = {}
+
+if command_line_args.config_file:
+ config_file = command_line_args.config_file
+else:
+ config_file = os.path.join(os.path.dirname(__file__), "iwyu_config.yml")
+
+with open(config_file, "r") as stream:
+ config = yaml.safe_load(stream)
+ for key, value in config.items():
+ if value is None:
+ config[key] = []
+
+IWYU_OPTIONS = config.get('iwyu_options', [])
+IWYU_FIX_OPTIONS = config.get('fix_options', [])
+NO_INCLUDES = config.get('no_includes', [])
+KEEP_INCLUDES = config.get('keep_includes', [])
+SKIP_FILES = tuple(config.get('skip_files', []))
+CYCLE_FILES: List[str] = []
+
+
+@dataclass
+class CompileCommand:
+ """An entry from compile_commands.json."""
+
+ file: str
+ command: str
+ directory: str
+ output: str
+
+
+class ResultType(enum.Enum):
+ """
+ Descriptions of enums.
+
+ ERROR: unexpected or unrecognized error cases
+ FAILED: the IWYU task for a given compile command entry failed
+ NO_CHANGE: the input header tree and source file have not changed since last time
+ NOT_RUNNING: sources which we intentionally skip running IWYU all together
+ RESUBMIT: the IWYU task failed, but it may work later after other header changes
+ SUCCESS: the IWYU task for a source file has succeeded
+ """
+
+ ERROR = enum.auto()
+ FAILED = enum.auto()
+ NO_CHANGE = enum.auto()
+ NOT_RUNNING = enum.auto()
+ RESUBMIT = enum.auto()
+ SUCCESS = enum.auto()
+
+
+TOOLCHAIN_DIR = command_line_args.mongo_toolchain_bin_dir
+SHUTDOWN_FLAG = False
+CLANG_INCLUDES = None
+IWYU_OPTIONS = [val for pair in zip(['-Xiwyu'] * len(IWYU_OPTIONS), IWYU_OPTIONS) for val in pair]
+if NO_INCLUDES:
+ NO_INCLUDE_REGEX = re.compile(r'^\s*#include\s+\"(' + '|'.join(NO_INCLUDES) + ')\"')
+if KEEP_INCLUDES:
+ KEEP_INCLUDE_REGEX = re.compile(r'^\s*#include\s+(' + '|'.join(KEEP_INCLUDES) + ')')
+CHANGED_FILES_REGEX = re.compile(r"^The\sfull\sinclude-list\sfor\s(.+):$", re.MULTILINE)
+
+
+def printer(message: str) -> None:
+ """
+ Prints output as appropriate.
+
+ We don't print output if we are shutting down because the logs will
+ explode and original error will be hard to locate.
+ """
+
+ if not SHUTDOWN_FLAG or command_line_args.verbose:
+ tqdm.write(str(message))
+
+
+def debug_printer(message: str) -> None:
+ """Print each step in the processing of IWYU."""
+
+ if command_line_args.verbose:
+ tqdm.write(str(message))
+
+
+def failed_return() -> ResultType:
+ """A common method to allow the processing to continue even after some file fails."""
+
+ if command_line_args.keep_going:
+ return ResultType.RESUBMIT
+ else:
+ return ResultType.FAILED
+
+
+def in_project_root(file: str) -> bool:
+ """
+ Return true if the file is in the project root.
+
+ This is assuming the project root is the same location
+ as the compile_commands.json file (the format of compile_commands.json
+ expects this as well).
+ """
+
+ return os.path.abspath(file).startswith(
+ os.path.abspath(os.path.dirname(command_line_args.compile_commands)))
+
+
+def copy_error_state(cmd_entry: CompileCommand, test_dir: str,
+ dir_ext: str = '.iwyu_test_dir') -> Optional[str]:
+ """
+ When we fail, we want to copy the current state of the temp dir.
+
+ This is so that the command that was used can be replicated and rerun,
+ primarily for debugging purposes.
+ """
+
+ # we never use a test_dir in check mode, since no files are copied in that mode.
+ if command_line_args.check:
+ return None
+
+ # make a directory in the output location that we can store the state of the the
+ # header dep and source file the compile command was run with, delete old results
+ base, _ = os.path.splitext(cmd_entry.output)
+ if os.path.exists(base + dir_ext):
+ shutil.rmtree(base + dir_ext)
+ os.makedirs(base + dir_ext, exist_ok=True)
+ basedir = os.path.basename(test_dir)
+ error_state_dir = os.path.join(base + dir_ext, basedir)
+ shutil.copytree(test_dir, error_state_dir)
+ return error_state_dir
+
+
+def calc_hash_of_file(file: str) -> str:
+ """
+ Calculate the hash of a file. Use mtime as well.
+
+ If the mtime is unchanged, don't do IO, just look up the last hash.
+ """
+
+ # we need to lock on specific file io because GIL does not cover system io, so two threads
+ # could be doing io on the same file at the same time.
+ if file not in hash_lookup_locks:
+ hash_lookup_locks[file] = threading.Lock()
+ with hash_lookup_locks[file]:
+ if file in mtime_hash_lookup and os.path.getmtime(file) == mtime_hash_lookup[file]['mtime']:
+ return mtime_hash_lookup[file]['hash']
+ else:
+ hash_val = hashlib.md5(open(file, 'rb').read()).hexdigest()
+ mtime_hash_lookup[file] = {'mtime': os.path.getmtime(file), 'hash': hash_val}
+ return hash_val
+
+
+def find_no_include(line: str, lines: List[str], output_lines: List[str]) -> bool:
+ """
+ We need to regex the line to see if it includes an include that matches our NO_INCLUDE_REGEX.
+
+ If so then we do not include that line
+ when we rewrite the file, and instead we add a IWYU no_include pragma inplace
+ """
+
+ no_include_header_found = False
+ no_include_header = re.findall(NO_INCLUDE_REGEX, line)
+
+ if no_include_header:
+ no_include_header_found = True
+ no_include_line = f'// IWYU pragma: no_include "{no_include_header[0]}"\n'
+ if no_include_line not in lines:
+ output_lines.append(no_include_line)
+ return no_include_header_found
+
+
+def add_pragmas(source_files: List[str]):
+ """
+ We automate some of the pragmas so there is not so much manual work.
+
+ There are general cases for some of the pragmas. In this case we open the target
+ source/header, search via regexes for specific includes we care about, then add
+ the pragma comments as necessary.
+ """
+
+ for source_file in source_files:
+
+ # before we run IWYU, we take a guess at the likely header by swapping .cpp for .h
+ # so it may not be a real header. After IWYU runs we know exactly where to add the pragmas
+ # in case we got it wrong the first time around
+ if not os.path.exists(source_file):
+ continue
+
+ # we load in the file content operate on it, and then write it back out
+ output_lines: List[str] = []
+ with open(source_file, 'r') as fin:
+ file_lines = fin.readlines()
+ for line in file_lines:
+
+ if NO_INCLUDES and find_no_include(line, file_lines, output_lines):
+ continue
+
+ if KEEP_INCLUDES and re.search(KEEP_INCLUDE_REGEX,
+ line) and '// IWYU pragma: keep' not in line:
+
+ output_lines.append(line.strip() + " // IWYU pragma: keep\n")
+ continue
+
+ output_lines.append(line)
+
+ with open(source_file, 'w') as fout:
+ for line in output_lines:
+ fout.write(line)
+
+
+def recalc_hashes(deps: List[str], change_dir: Optional[str] = None) -> Dict[str, Any]:
+ """
+ We calculate the hashes from the header dep list generated by the compiler.
+
+ We also create cumulative hash for convenance.
+
+ Some cases we are operating a test directory, but deps are referenced as if they are
+ in the project root. The change_dir option here allows us to calc the the hashes from
+ the test directory we may be working in, but still record the deps files in a compat
+ fashion with other processes that work out of project root, e.g. testing if there was a
+ change from last time.
+ """
+
+ hashes: Dict[str, Any] = {'deps': {}}
+ full_hash = hashlib.new('md5')
+ for dep in sorted(list(deps)):
+ if not in_project_root(dep):
+ continue
+ if change_dir:
+ orig_dep = dep
+ dep = os.path.join(change_dir, dep)
+ dep_hash = calc_hash_of_file(dep)
+ if change_dir:
+ dep = orig_dep
+ full_hash.update(dep_hash.encode('utf-8'))
+ hashes['deps'][dep] = dep_hash
+ hashes['full_hash'] = full_hash.hexdigest()
+ return hashes
+
+
+def setup_test_dir(cmd_entry: CompileCommand, test_dir: str) -> List[str]:
+ """
+ Here we are copying the source and required header tree from the main source tree.
+
+ Returns the associate source and header that were copied into the test dir.
+
+ We want an isolated location to perform analysis and apply changes so everything is not
+ clashing. At this point we don't know for sure what header IWYU is going to associate with the source
+ but for mongo codebase, 99.9% of the time its just swap the .cpp for .h. We need this to apply
+ some pragma to keep IWYU from removing headers it doesn't understand (cross platform or
+ third party like boost or asio). The pragmas are harmless in and of themselves so adding the
+ mistakenly in the 0.1% of the time is negligible.
+ """
+
+ original_sources = [
+ orig_source for orig_source in [cmd_entry.file,
+ os.path.splitext(cmd_entry.file)[0] + '.h']
+ if os.path.exists(orig_source)
+ ]
+ test_source_files = [os.path.join(test_dir, source_file) for source_file in original_sources]
+ dep_headers = [dep for dep in IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys()]
+
+ # copy each required header from our source tree into our test dir
+ # this does cost some time, but the alternative (everything operating in the real source tree)
+ # was much longer due to constant failures.
+ for source_file in dep_headers + ['etc/iwyu_mapping.imp']:
+ if in_project_root(source_file):
+ os.makedirs(os.path.join(test_dir, os.path.dirname(source_file)), exist_ok=True)
+ shutil.copyfile(source_file, os.path.join(test_dir, source_file))
+
+ # need to create dirs for outputs
+ for output in shlex.split(cmd_entry.output):
+ os.makedirs(os.path.join(test_dir, os.path.dirname(output)), exist_ok=True)
+
+ return test_source_files
+
+
+def get_clang_includes() -> List[str]:
+ """
+ IWYU needs some extra help to know what default includes clang is going to bring in when it normally compiles.
+
+ The query reliably gets the include dirs that would be used in normal compiles. We cache and reuse the result
+ so the subprocess only runs once.
+ """
+ global CLANG_INCLUDES # pylint: disable=global-statement
+ if CLANG_INCLUDES is None:
+ clang_includes = subprocess.getoutput(
+ f"{TOOLCHAIN_DIR}/clang++ -Wp,-v -x c++ - -fsyntax-only < /dev/null 2>&1 | sed -e '/^#include <...>/,/^End of search/{{ //!b }};d'"
+ ).split('\n')
+ clang_includes = ['-I' + include.strip() for include in clang_includes]
+ CLANG_INCLUDES = clang_includes
+ return CLANG_INCLUDES
+
+
+def write_cycle_diff(source_file: str, cycle_dir: str, latest_hashes: Dict[str, Any]) -> None:
+ """
+ Write out the diffs between the last iteration and the latest iteration.
+
+ The file contains the hash for before and after for each file involved in the compilation.
+ """
+
+ with open(os.path.join(cycle_dir, 'hashes_diff.txt'), 'w') as out:
+ dep_list = set(
+ list(IWYU_ANALYSIS_STATE[source_file]['hashes']['deps'].keys()) +
+ list(latest_hashes['deps'].keys()))
+ not_found_str = "not found" + (" " * 23)
+ for dep in sorted(dep_list):
+ out.write(
+ f"Original: {IWYU_ANALYSIS_STATE[source_file]['hashes']['deps'].get(dep, not_found_str)}, Latest: {latest_hashes['deps'].get(dep, not_found_str)} - {dep}\n"
+ )
+
+
+def check_for_cycles(cmd_entry: CompileCommand, latest_hashes: Dict[str, Any],
+ test_dir: str) -> Optional[ResultType]:
+ """
+ IWYU can induce cycles so we should check our previous results to see if a cycle has occurred.
+
+ These cycles can happen if a header change induces some other header change which then inturn induces
+ the original header change. These cycles are generally harmless and are easily broken with a keep
+ pragma but finding what files are induces the cycle is the challenge.
+
+ With cycle debug mode enabled, the entire header tree is saved for each iteration in the cycle so
+ all files can be fully examined.
+ """
+
+ if cmd_entry.file not in IWYU_CYCLE_STATE:
+ IWYU_CYCLE_STATE[cmd_entry.file] = {
+ 'cycles': [],
+ }
+
+ if latest_hashes['full_hash'] in IWYU_CYCLE_STATE[cmd_entry.file]['cycles']:
+ if command_line_args.cycle_debugging:
+ if 'debug_cycles' not in IWYU_CYCLE_STATE[cmd_entry.file]:
+ IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles'] = {}
+
+ IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles'][
+ latest_hashes['full_hash']] = latest_hashes
+
+ cycle_dir = copy_error_state(
+ cmd_entry, test_dir, dir_ext=
+ f".{latest_hashes['full_hash']}.cycle{len(IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles'])}"
+ )
+ write_cycle_diff(cmd_entry.file, cycle_dir, latest_hashes)
+ if latest_hashes['full_hash'] not in IWYU_CYCLE_STATE[cmd_entry.file]['debug_cycles']:
+ printer(f"{Fore.YELLOW}[5] - Cycle Found!: {cmd_entry.file}{Fore.RESET}")
+ else:
+ printer(f"{Fore.RED}[5] - Cycle Done! : {cmd_entry.file}{Fore.RESET}")
+ return failed_return()
+ else:
+ printer(f"{Fore.RED}[5] - Cycle Found!: {cmd_entry.file}{Fore.RESET}")
+ CYCLE_FILES.append(cmd_entry.file)
+ return ResultType.SUCCESS
+ else:
+ IWYU_CYCLE_STATE[cmd_entry.file]['cycles'].append(latest_hashes['full_hash'])
+
+ return None
+
+
+def write_iwyu_data() -> None:
+ """Store the data we have acquired during this run so we can resume at the same spot on subsequent runs."""
+
+ # There might be faster ways to store this like serialization or
+ # what not, but having human readable json is good for debugging.
+ # on a full build this takes around 10 seconds to write out.
+ if IWYU_ANALYSIS_STATE:
+ try:
+ # atomic move operation prevents ctrl+c mashing from
+ # destroying everything, at least we can keep the original
+ # data safe from emotional outbursts.
+ with tempfile.NamedTemporaryFile() as temp:
+ with open(temp.name, 'w') as iwyu_data_file:
+ json.dump(IWYU_ANALYSIS_STATE, iwyu_data_file, sort_keys=True, indent=4)
+ shutil.move(temp.name, command_line_args.iwyu_data)
+ except FileNotFoundError as exc:
+ if temp.name in str(exc):
+ pass
+
+
+def need_to_process(cmd_entry: CompileCommand,
+ custom_printer: Callable[[str], None] = printer) -> Optional[ResultType]:
+ """
+ The first step in the first step for processing a given source file.
+
+ We have a list of skip prefixes, for example build or third_party, but others can be added.
+
+ If it is a file we are not skipping, then we check if we have already done the work by calculating the
+ hashes and seeing if what we recorded last time has changed.
+ """
+
+ if cmd_entry.file.startswith(
+ SKIP_FILES) or cmd_entry.file in CYCLE_FILES or '/conftest_' in cmd_entry.file:
+ custom_printer(f"{Fore.YELLOW}[5] - Not running!: {cmd_entry.file}{Fore.RESET}")
+ return ResultType.NOT_RUNNING
+
+ if IWYU_ANALYSIS_STATE.get(cmd_entry.file):
+ hashes = recalc_hashes(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys())
+
+ # we only skip if the matching mode was successful last time, otherwise we assume we need to rerun
+ mode_success = 'CHECK' if command_line_args.check else 'FIX'
+ if command_line_args.verbose:
+ diff_files = list(
+ set(hashes['deps'].keys()).symmetric_difference(
+ set(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys())))
+ if diff_files:
+ msg = f"[1] Need to process {cmd_entry.file} because different files:\n"
+ for file in diff_files:
+ msg += f'{file}\n'
+ debug_printer(msg)
+ for file in IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'].keys():
+ if file in hashes['deps'] and hashes['deps'][file] != IWYU_ANALYSIS_STATE[
+ cmd_entry.file]['hashes']['deps'][file]:
+ debug_printer(
+ f"[1] Need to process {cmd_entry.file} because hash changed:\n{file}: {hashes['deps'][file]}\n{file}: {IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'][file]}"
+ )
+
+ if hashes['full_hash'] == IWYU_ANALYSIS_STATE[
+ cmd_entry.file]['hashes']['full_hash'] and mode_success in IWYU_ANALYSIS_STATE[
+ cmd_entry.file].get('success', []):
+ custom_printer(f"{Fore.YELLOW}[5] - No Change! : {cmd_entry.file}{Fore.RESET}")
+ return ResultType.NO_CHANGE
+
+ return None
+
+
+def calc_dep_headers(cmd_entry: CompileCommand) -> Optional[ResultType]:
+ """
+ The second step in the IWYU process.
+
+ We need to get a list of headers which are dependencies so we can copy them to an isolated
+ working directory (so parallel IWYU changes don't break us). We will switch on preprocessor
+ for faster generation of the dep file.
+
+ Once we have the deps list, we parse it and calc the hashes of the deps.
+ """
+
+ try:
+ with tempfile.NamedTemporaryFile() as depfile:
+
+ # first time we could be executing a real command so we make sure the dir
+ # so the compiler is not mad
+ outputs = shlex.split(cmd_entry.output)
+ for output in outputs:
+ out_dir = os.path.dirname(output)
+ if out_dir:
+ os.makedirs(out_dir, exist_ok=True)
+
+ # setup up command for fast depfile generation
+ cmd = cmd_entry.command
+ cmd += f' -MD -MF {depfile.name}'
+ cmd = cmd.replace(' -c ', ' -E ')
+ debug_printer(f"[1] - Getting Deps: {cmd_entry.file}")
+
+ try:
+ deps_proc = subprocess.run(cmd, shell=True, capture_output=True, text=True,
+ timeout=300)
+ except subprocess.TimeoutExpired:
+ deps_proc = None
+ pass
+
+ # if successful, record the latest deps with there hashes, otherwise try again later
+ if deps_proc is None or deps_proc.returncode != 0:
+ printer(f"{Fore.RED}[5] - Deps Failed!: {cmd_entry.file}{Fore.RESET}")
+ printer(deps_proc.stderr)
+ return ResultType.RESUBMIT
+ else:
+ with open(depfile.name) as deps:
+ deps_str = deps.read()
+ deps_str = deps_str.replace('\\\n', '').strip()
+
+ hashes = recalc_hashes(shlex.split(deps_str)[1:])
+ if not IWYU_ANALYSIS_STATE.get(cmd_entry.file):
+ IWYU_ANALYSIS_STATE[cmd_entry.file] = asdict(cmd_entry)
+ IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes'] = hashes
+ IWYU_ANALYSIS_STATE[cmd_entry.file]['success'] = []
+
+ # if the dep command failed the context will through an execption, we will ignore just
+ # that case
+ except FileNotFoundError as exc:
+ traceback.print_exc()
+ if depfile.name in str(exc):
+ pass
+
+ return None
+
+
+def execute_iwyu(cmd_entry: CompileCommand, test_dir: str) -> Union[ResultType, bytes]:
+ """
+ The third step of IWYU analysis. Check mode will stop here.
+
+ Here we want to execute IWYU on our source. Note at this point in fix mode
+ we will be working out of an isolated test directory which has the
+ required header tree copied over. Check mode will just pass in the original
+ project root as the test_dir (the real source tree).
+ """
+
+ # assert we are working with a pure clang++ build
+ if not cmd_entry.command.startswith(f'{TOOLCHAIN_DIR}/clang++'):
+ printer("unexpected compiler:")
+ printer(cmd_entry.command)
+ return ResultType.FAILED
+
+ # swap out for our tool and add in extra options for IWYU
+ cmd = f'{TOOLCHAIN_DIR}/include-what-you-use' + cmd_entry.command[len(f'{TOOLCHAIN_DIR}/clang++'
+ ):]
+ cmd += ' ' + ' '.join(get_clang_includes())
+ cmd += ' ' + ' '.join(IWYU_OPTIONS)
+
+ # mimic the PATH we normally use in our build
+ env = os.environ.copy()
+ env['PATH'] += f':{TOOLCHAIN_DIR}'
+
+ debug_printer(f'[2] - Running IWYU: {cmd_entry.file}')
+ proc = subprocess.run(cmd, shell=True, env=env, capture_output=True, cwd=test_dir)
+
+ # IWYU has some bugs about forward declares I am assuming, because in some cases even though
+ # we have passed --no_fwd_decls it still sometimes recommend forward declares and sometimes they
+ # are wrong and cause compilation errors.
+ remove_fwd_declares = []
+ for line in proc.stderr.decode('utf-8').split('\n'):
+ line = line.strip()
+ if not line.endswith(':') and not line.startswith(
+ ('#include ', '-')) and ('class ' in line or 'struct ' in line):
+ continue
+ remove_fwd_declares.append(line)
+ iwyu_output = '\n'.join(remove_fwd_declares)
+
+ # IWYU has weird exit codes, where a >=2 is considered success:
+ # https://github.com/include-what-you-use/include-what-you-use/blob/clang_12/iwyu_globals.h#L27-L34
+ if command_line_args.check and proc.returncode != 2:
+ printer(f"{Fore.RED}[2] - IWYU Failed: {cmd_entry.file}{Fore.RESET}")
+ if proc.returncode < 2:
+ printer(f"exited with error: {proc.returncode}")
+ else:
+ printer(f"changes required: {proc.returncode - 2}")
+ printer(iwyu_output)
+ return failed_return()
+ elif proc.returncode < 2:
+ printer(f'{Fore.RED}[2] - IWYU Failed : {cmd_entry.file}{Fore.RESET}')
+ printer(cmd)
+ printer(str(proc.returncode))
+ printer(proc.stderr.decode('utf-8'))
+ copy_error_state(cmd_entry, test_dir)
+ return failed_return()
+
+ # save the output for debug or inspection later
+ with open(os.path.splitext(cmd_entry.output)[0] + '.iwyu', 'w') as iwyu_out:
+ iwyu_out.write(iwyu_output)
+
+ return iwyu_output.encode('utf-8')
+
+
+def apply_fixes(cmd_entry: CompileCommand, iwyu_output: bytes,
+ test_dir: str) -> Optional[ResultType]:
+ """
+ Step 4 in the IWYU process.
+
+ We need to run the fix_includes script to apply the output from the IWYU binary.
+ """
+ cmd = [f'{sys.executable}', f'{TOOLCHAIN_DIR}/fix_includes.py'] + IWYU_FIX_OPTIONS
+
+ debug_printer(f'[3] - Apply fixes : {cmd_entry.file}')
+ try:
+ subprocess.run(cmd, capture_output=True, input=iwyu_output, timeout=180, cwd=test_dir)
+ except subprocess.TimeoutExpired:
+ printer(f"{Fore.RED}[5] - Apply failed: {cmd_entry.file}{Fore.RESET}")
+ return ResultType.RESUBMIT
+
+ return None
+
+
+def test_compile(cmd_entry: CompileCommand, test_dir: str) -> Optional[ResultType]:
+ """
+ Step 5 in the IWYU analysis and the last step for fix mode.
+
+ We run the normal compile command in a test directory and make sure it is successful before
+ it will be copied back into the real source tree for inclusion into other jobs.
+ """
+
+ try:
+ with tempfile.NamedTemporaryFile() as depfile:
+ debug_printer(f"[4] - Test compile: {cmd_entry.file}")
+
+ # we want to capture the header deps again because IWYU may have changed them
+ cmd = cmd_entry.command
+ cmd += f' -MMD -MF {depfile.name}'
+ try:
+ p3 = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=300,
+ cwd=test_dir)
+ except (subprocess.TimeoutExpired, MemoryError):
+ p3 = None
+ pass
+
+ # our test compile has failed so we need to report and setup for debug
+ if p3 is not None and p3.returncode != 0:
+ printer(f"{Fore.RED}[5] - IWYU Failed!: {cmd_entry.file}{Fore.RESET}")
+ printer(f"{cmd}")
+ printer(f"{p3.stderr}")
+ copy_error_state(cmd_entry, test_dir)
+ return failed_return()
+
+ else:
+ with open(depfile.name) as deps:
+ # calculate the hashes of the deps used to create
+ # this successful compile.
+ deps_str = deps.read()
+ deps_str = deps_str.replace('\\\n', '').strip()
+ hashes = recalc_hashes(shlex.split(deps_str)[1:], change_dir=test_dir)
+
+ if result := check_for_cycles(cmd_entry, hashes, test_dir):
+ return result
+
+ IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes'] = hashes
+ if 'FIX' not in IWYU_ANALYSIS_STATE[cmd_entry.file]['success']:
+ IWYU_ANALYSIS_STATE[cmd_entry.file]['success'].append('FIX')
+ printer(f"{Fore.GREEN}[5] - IWYU Success: {cmd_entry.file}{Fore.RESET}")
+ return ResultType.SUCCESS
+
+ # if we failed, the depfile may not have been generated, so check for it
+ # ignore it
+ except FileNotFoundError as exc:
+ if depfile.name in str(exc):
+ pass
+
+ return None
+
+
+def intialize_deps(cmd_entry: CompileCommand) -> Tuple[ResultType, CompileCommand]:
+ """
+ When running in fix mode, we take some time to initialize the header deps.
+
+ This is mainly used to improve the overall time to complete full analysis. We want process
+ the source files in order of files with least dependencies to most dependencies. The rational
+ is that if it has a lot of dependencies we should do last so any changes in those dependencies
+ are automatically accounted for and the change of need to do rework is lessened. Also the
+ progress bar can be more accurate and not count skip files.
+ """
+
+ # step 1
+ if result := need_to_process(cmd_entry, custom_printer=debug_printer):
+ return result, cmd_entry
+
+ # if we have deps from a previous that should be a good enough indicator
+ # of how dependency heavy it is, and its worth just taking that over
+ # needing to invoke the compiler.
+ try:
+ if len(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps']):
+ return ResultType.SUCCESS, cmd_entry
+
+ except KeyError:
+ pass
+
+ if result := calc_dep_headers(cmd_entry):
+ return result, cmd_entry
+
+ return ResultType.SUCCESS, cmd_entry
+
+
+def check_iwyu(cmd_entry: CompileCommand) -> ResultType:
+ """
+ One of the two thread functions the main thread pool executor will call.
+
+ Here we execute up to step 3 (steps at the top comment) and report success
+ if IWYU reports no required changes.
+ """
+
+ # step 1
+ if result := need_to_process(cmd_entry):
+ return result
+
+ # step 2
+ if result := calc_dep_headers(cmd_entry):
+ return result
+
+ # step 3
+ iwyu_out = execute_iwyu(cmd_entry, '.')
+ if isinstance(iwyu_out, ResultType):
+ return iwyu_out
+
+ # success!
+ printer(f"{Fore.GREEN}[2] - IWYU Success: {cmd_entry.file}{Fore.RESET}")
+ if "CHECK" not in IWYU_ANALYSIS_STATE[cmd_entry.file]['success']:
+ IWYU_ANALYSIS_STATE[cmd_entry.file]['success'].append('CHECK')
+ return ResultType.SUCCESS
+
+
+def fix_iwyu(cmd_entry: CompileCommand) -> ResultType:
+ """
+ One of the two thread functions the main thread pool executor will call.
+
+ Here we execute up to step 5 (steps at the top comment) and report success
+ if we are able to successfully compile the original command after IWYU
+ has made its changes.
+ """
+
+ # step 1
+ if result := need_to_process(cmd_entry):
+ return result
+
+ # step 2
+ if result := calc_dep_headers(cmd_entry):
+ return result
+
+ with tempfile.TemporaryDirectory() as test_dir:
+
+ # the changes will be done in an isolated test dir so not to conflict with
+ # other concurrent processes.
+ test_source_files = setup_test_dir(cmd_entry, test_dir)
+
+ # a first round of pragmas to make sure IWYU doesn't fail or remove things we dont want
+ add_pragmas(test_source_files)
+
+ # step 3
+ iwyu_out = execute_iwyu(cmd_entry, test_dir)
+ if isinstance(iwyu_out, ResultType):
+ return iwyu_out
+
+ # now we can extract exactly what files IWYU operated on and copy only those back
+ changed_files = [
+ os.path.join(test_dir, file)
+ for file in re.findall(CHANGED_FILES_REGEX, iwyu_out.decode('utf-8'))
+ if in_project_root(file)
+ ]
+ test_source_files += [file for file in changed_files if file not in test_source_files]
+
+ # step 4
+ if result := apply_fixes(cmd_entry, iwyu_out, test_dir):
+ return result
+
+ # a final round of pragmas for the next time this is run through IWYU
+ add_pragmas(test_source_files)
+
+ # step 5
+ result = test_compile(cmd_entry, test_dir)
+ if result == ResultType.SUCCESS:
+ for file in test_source_files:
+ if os.path.exists(file):
+ shutil.move(file, file[len(test_dir) + 1:])
+
+ return result
+
+
+def run_iwyu(cmd_entry: CompileCommand) -> Tuple[ResultType, CompileCommand]:
+ """Intermediate function which delegates the underlying mode to run."""
+
+ if command_line_args.check:
+ return check_iwyu(cmd_entry), cmd_entry
+ else:
+ return fix_iwyu(cmd_entry), cmd_entry
+
+
+def main() -> None:
+ """Main function."""
+ global IWYU_ANALYSIS_STATE, SHUTDOWN_FLAG # pylint: disable=global-statement
+ atexit.register(write_iwyu_data)
+
+ with concurrent.futures.ThreadPoolExecutor(
+ max_workers=len(os.sched_getaffinity(0)) + 4) as executor:
+
+ # ctrl+c tru to shutdown as fast as possible.
+ def sigint_handler(the_signal, frame):
+ executor.shutdown(wait=False, cancel_futures=True)
+ sys.exit(1)
+
+ signal.signal(signal.SIGINT, sigint_handler)
+
+ # load in any data from prior runs
+ if os.path.exists(command_line_args.iwyu_data):
+ with open(command_line_args.iwyu_data) as iwyu_data_file:
+ IWYU_ANALYSIS_STATE = json.load(iwyu_data_file)
+
+ # load in the compile commands
+ with open(command_line_args.compile_commands) as compdb_file:
+ compiledb = [CompileCommand(**json_data) for json_data in json.load(compdb_file)]
+
+ # assert the generated source code has been generated
+ for cmd_entry in compiledb:
+ if cmd_entry.file.endswith('_gen.cpp') and not os.path.exists(cmd_entry.file):
+ printer(f"{Fore.RED}[5] - Missing Gen!: {cmd_entry.file}{Fore.RESET}")
+ printer(
+ f"Error: missing generated file {cmd_entry.file}, make sure generated-sources are generated."
+ )
+ sys.exit(1)
+
+ total_cmds = len(compiledb)
+ start_index = int(total_cmds * command_line_args.start_ratio)
+ if start_index < 0:
+ start_index = 0
+ if start_index > total_cmds:
+ start_index = total_cmds
+
+ end_index = int(total_cmds * command_line_args.end_ratio)
+ if end_index < 0:
+ end_index = 0
+ if end_index > total_cmds:
+ end_index = total_cmds
+
+ if start_index == end_index:
+ print(f"Error: start_index and end_index are the same: {start_index}")
+ sys.exit(1)
+ if start_index > end_index:
+ print(
+ f"Error: start_index {start_index} can not be greater than end_index {end_index}"
+ )
+ sys.exit(1)
+
+ print(f"Analyzing compile commands from {start_index} to {end_index}.")
+ compiledb = compiledb[start_index:end_index]
+ if not command_line_args.check:
+ # We can optimize the order we process things by processing source files
+ # with the least number of dependencies first. This is a cost up front
+ # but will result in huge gains in the amount of re-processing to be done.
+ printer("Getting Initial Header Dependencies...")
+ cmd_entry_list = []
+ try:
+ with tqdm(total=len(compiledb), disable=None) as pbar:
+
+ # create and run the dependency check jobs
+ future_cmd = {
+ executor.submit(intialize_deps, cmd_entry): cmd_entry
+ for cmd_entry in compiledb
+ }
+ for future in concurrent.futures.as_completed(future_cmd):
+ result, cmd_entry = future.result()
+ if result != ResultType.NOT_RUNNING:
+ cmd_entry_list.append(cmd_entry)
+ pbar.update(1)
+ except Exception:
+ SHUTDOWN_FLAG = True
+ traceback.print_exc()
+ executor.shutdown(wait=True, cancel_futures=True)
+ sys.exit(1)
+ else:
+ cmd_entry_list = compiledb
+
+ try:
+
+ # this loop will keep looping until a full run produce no new changes.
+ changes_left = True
+ while changes_left:
+ changes_left = False
+
+ with tqdm(total=len(cmd_entry_list), disable=None) as pbar:
+
+ # create and run the IWYU jobs
+ def dep_sorted(cmd_entry):
+ try:
+ return len(IWYU_ANALYSIS_STATE[cmd_entry.file]['hashes']['deps'])
+ except KeyError:
+ return 0
+
+ future_cmd = {
+ executor.submit(run_iwyu, cmd_entry): cmd_entry
+ for cmd_entry in sorted(cmd_entry_list, key=dep_sorted)
+ }
+
+ # process the results
+ for future in concurrent.futures.as_completed(future_cmd):
+ result, cmd_entry = future.result()
+
+ # any result which implies there could be changes required sets the
+ # next loop
+ if result not in (ResultType.NO_CHANGE, ResultType.NOT_RUNNING):
+ changes_left = True
+
+ # if a file is considered done for this loop, update the status bar
+ if result in [
+ ResultType.SUCCESS, ResultType.NO_CHANGE, ResultType.NOT_RUNNING
+ ]:
+ pbar.update(1)
+ # resubmit jobs which may have a better change to run later
+ elif result == ResultType.RESUBMIT:
+ executor.submit(run_iwyu, cmd_entry)
+ # handle a failure case, excpetion quickly drops us out of this loop.
+ else:
+ SHUTDOWN_FLAG = True
+ tqdm.write(
+ f"{result.name}: Shutting down other threads, please be patient."
+ )
+ raise Exception(
+ f'Shutdown due to {result.name} {cmd_entry["file"]}')
+
+ except Exception:
+ SHUTDOWN_FLAG = True
+ traceback.print_exc()
+ executor.shutdown(wait=True, cancel_futures=True)
+ sys.exit(1)
+ finally:
+ if CYCLE_FILES:
+ printer(f"{Fore.YELLOW} Cycles detected:")
+ for file in CYCLE_FILES:
+ printer(f' {file}')
+
+
+main()
diff --git a/buildscripts/iwyu/test/basic/a.h b/buildscripts/iwyu/test/basic/a.h
new file mode 100644
index 00000000000..ad792ace34b
--- /dev/null
+++ b/buildscripts/iwyu/test/basic/a.h
@@ -0,0 +1 @@
+#include "b.h"
diff --git a/buildscripts/iwyu/test/basic/b.cpp b/buildscripts/iwyu/test/basic/b.cpp
new file mode 100644
index 00000000000..dcbc8627764
--- /dev/null
+++ b/buildscripts/iwyu/test/basic/b.cpp
@@ -0,0 +1,5 @@
+#include "a.h"
+
+type_b return_b_function() {
+ return type_b();
+}
diff --git a/buildscripts/iwyu/test/basic/b.h b/buildscripts/iwyu/test/basic/b.h
new file mode 100644
index 00000000000..422d7626e90
--- /dev/null
+++ b/buildscripts/iwyu/test/basic/b.h
@@ -0,0 +1 @@
+class type_b {};
diff --git a/buildscripts/iwyu/test/basic/expected_results.py b/buildscripts/iwyu/test/basic/expected_results.py
new file mode 100644
index 00000000000..98ed60ea4fb
--- /dev/null
+++ b/buildscripts/iwyu/test/basic/expected_results.py
@@ -0,0 +1,17 @@
+import os
+import sys
+
+EXPECTED_B_CPP = """
+#include "b.h"
+
+type_b return_b_function() {
+ return type_b();
+}
+"""
+
+with open('b.cpp') as f:
+ content = f.read()
+ if content != EXPECTED_B_CPP:
+ print(f'Actual:\n"""{content}"""')
+ print(f'Expected:\n"""{EXPECTED_B_CPP}"""')
+ sys.exit(1)
diff --git a/buildscripts/iwyu/test/basic/test_config.yml b/buildscripts/iwyu/test/basic/test_config.yml
new file mode 100644
index 00000000000..a5b906f5558
--- /dev/null
+++ b/buildscripts/iwyu/test/basic/test_config.yml
@@ -0,0 +1,25 @@
+# options passed to IWYU
+iwyu_options:
+ - '--max_line_length=100'
+ - '--no_fwd_decls'
+ - '--prefix_header_includes=add'
+ - '--transitive_includes_only'
+
+# options passed to the fix script
+fix_options:
+ - '--blank_lines'
+ - '--nocomments'
+ - '--noreorder'
+ - '--safe_headers'
+
+# filename regex to swap no_include in place
+# quotes and brackets not included quotes are always assumed
+# since this is targeting IWYU added headers
+no_includes:
+
+# prefixes (non regex) to skip
+skip_files:
+
+# regex file paths to add keep pragma
+# include quotes are angle brackets
+keep_includes:
diff --git a/buildscripts/iwyu/test/no_include/a.h b/buildscripts/iwyu/test/no_include/a.h
new file mode 100644
index 00000000000..ad792ace34b
--- /dev/null
+++ b/buildscripts/iwyu/test/no_include/a.h
@@ -0,0 +1 @@
+#include "b.h"
diff --git a/buildscripts/iwyu/test/no_include/b.cpp b/buildscripts/iwyu/test/no_include/b.cpp
new file mode 100644
index 00000000000..dcbc8627764
--- /dev/null
+++ b/buildscripts/iwyu/test/no_include/b.cpp
@@ -0,0 +1,5 @@
+#include "a.h"
+
+type_b return_b_function() {
+ return type_b();
+}
diff --git a/buildscripts/iwyu/test/no_include/b.h b/buildscripts/iwyu/test/no_include/b.h
new file mode 100644
index 00000000000..422d7626e90
--- /dev/null
+++ b/buildscripts/iwyu/test/no_include/b.h
@@ -0,0 +1 @@
+class type_b {};
diff --git a/buildscripts/iwyu/test/no_include/expected_results.py b/buildscripts/iwyu/test/no_include/expected_results.py
new file mode 100644
index 00000000000..90bda7e15a4
--- /dev/null
+++ b/buildscripts/iwyu/test/no_include/expected_results.py
@@ -0,0 +1,18 @@
+import os
+import sys
+
+EXPECTED_B_CPP = """// IWYU pragma: no_include "b.h"
+
+#include "a.h" // IWYU pragma: keep
+
+type_b return_b_function() {
+ return type_b();
+}
+"""
+
+with open('b.cpp') as f:
+ content = f.read()
+ if content != EXPECTED_B_CPP:
+ print(f'Actual:\n"""{content}"""')
+ print(f'Expected:\n"""{EXPECTED_B_CPP}"""')
+ sys.exit(1)
diff --git a/buildscripts/iwyu/test/no_include/test_config.yml b/buildscripts/iwyu/test/no_include/test_config.yml
new file mode 100644
index 00000000000..e441f5bac35
--- /dev/null
+++ b/buildscripts/iwyu/test/no_include/test_config.yml
@@ -0,0 +1,27 @@
+# options passed to IWYU
+iwyu_options:
+ - '--max_line_length=100'
+ - '--no_fwd_decls'
+ - '--prefix_header_includes=add'
+ - '--transitive_includes_only'
+
+# options passed to the fix script
+fix_options:
+ - '--blank_lines'
+ - '--nocomments'
+ - '--noreorder'
+ - '--safe_headers'
+
+# filename regex to swap no_include in place
+# quotes and brackets not included quotes are always assumed
+# since this is targeting IWYU added headers
+no_includes:
+ - 'b.h'
+
+# prefixes (non regex) to skip
+skip_files:
+
+# regex file paths to add keep pragma
+# include quotes are angle brackets
+keep_includes:
+- '"a.h"'
diff --git a/buildscripts/iwyu/test/run_tests.py b/buildscripts/iwyu/test/run_tests.py
new file mode 100644
index 00000000000..d0e32f00a8d
--- /dev/null
+++ b/buildscripts/iwyu/test/run_tests.py
@@ -0,0 +1,97 @@
+import pathlib
+import yaml
+import json
+import shutil
+import os
+import glob
+import subprocess
+import sys
+import argparse
+import concurrent.futures
+
+parser = argparse.ArgumentParser(description='Run tests for the IWYU analysis script.')
+
+parser.add_argument('--mongo-toolchain-bin-dir', type=str,
+ help='Which toolchain bin directory to use for this analysis.',
+ default='/opt/mongodbtoolchain/v4/bin')
+
+args = parser.parse_args()
+
+if os.getcwd() != pathlib.Path(__file__).parent:
+ print(
+ f"iwyu test script must run in the tests directory, changing dirs to {pathlib.Path(__file__).parent.resolve()}"
+ )
+ os.chdir(pathlib.Path(__file__).parent.resolve())
+
+analysis_script = pathlib.Path(__file__).parent.parent / 'run_iwyu_analysis.py'
+
+
+def run_test(entry):
+ print(f"Running test {pathlib.Path(entry)}...")
+ test_dir = pathlib.Path(entry) / 'test_run'
+ if os.path.exists(test_dir):
+ shutil.rmtree(test_dir)
+
+ shutil.copytree(pathlib.Path(entry), test_dir)
+
+ source_files = glob.glob('**/*.cpp', root_dir=test_dir, recursive=True)
+ compile_commands = []
+
+ for source_file in source_files:
+ output = os.path.splitext(source_file)[0] + '.o'
+ compile_commands.append({
+ 'file': source_file,
+ 'command': f"{args.mongo_toolchain_bin_dir}/clang++ -o {output} -c {source_file}",
+ "directory": os.path.abspath(test_dir),
+ "output": output,
+ })
+
+ with open(test_dir / 'compile_commands.json', 'w') as compdb:
+ json.dump(compile_commands, compdb)
+
+ os.makedirs(test_dir / 'etc', exist_ok=True)
+ with open(test_dir / 'etc' / 'iwyu_mapping.imp', 'w') as mapping:
+ mapping.write(
+ '[{include: ["\\"placeholder.h\\"", "private", "\\"placeholder2.h\\"", "public"]}]')
+
+ iwyu_run = subprocess.run(
+ [sys.executable, analysis_script, '--verbose', '--config-file=test_config.yml'], text=True,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=test_dir)
+
+ results_run = subprocess.run(
+ [sys.executable, pathlib.Path(entry) / 'expected_results.py'], stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT, text=True, cwd=test_dir)
+
+ msg = '\n'.join([iwyu_run.stdout, results_run.stdout, f"FAILED!: {pathlib.Path(entry)}"])
+ msg = '\n'.join([f"[{pathlib.Path(entry).name}] {line}" for line in msg.split('\n')])
+
+ if results_run.returncode != 0:
+ return results_run.returncode, msg, pathlib.Path(entry).name
+ else:
+ return results_run.returncode, f"[{pathlib.Path(entry).name}] PASSED!: {pathlib.Path(entry)}", pathlib.Path(
+ entry).name
+
+
+failed_tests = []
+with concurrent.futures.ThreadPoolExecutor(
+ max_workers=len(os.sched_getaffinity(0)) + 4) as executor:
+
+ # create and run the IWYU jobs
+ future_cmd = {
+ executor.submit(run_test, entry): entry
+ for entry in pathlib.Path(__file__).parent.glob('*') if os.path.isdir(entry)
+ }
+
+ # process the results
+ for future in concurrent.futures.as_completed(future_cmd):
+ result, message, test_name = future.result()
+ if result != 0:
+ failed_tests += [test_name]
+ print(message)
+
+print("\n***Tests complete.***")
+if failed_tests:
+ print("The following tests failed:")
+ for test in failed_tests:
+ print(' - ' + test)
+ print("Please review the logs above for more information.")
diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml
index 1d5fe4ff950..cc62dc05f7a 100644
--- a/etc/evergreen_yml_components/definitions.yml
+++ b/etc/evergreen_yml_components/definitions.yml
@@ -2797,6 +2797,16 @@ tasks:
targets:
install-core
+- name: iwyu_self_test
+ tags: []
+ commands:
+ - command: subprocess.exec
+ params:
+ binary: bash
+ args:
+ - "src/evergreen/run_python_script.sh"
+ - "buildscripts/iwyu/test/run_tests.py"
+
- name: libdeps_graph_linting
tags: []
depends_on:
@@ -8171,6 +8181,11 @@ task_groups:
- libdeps_graph_linting
- <<: *compile_task_group_template
+ name: iwyu_self_test_TG
+ tasks:
+ - iwyu_self_test
+
+- <<: *compile_task_group_template
name: compile_ninja_TG
tasks:
- compile_ninja
diff --git a/etc/evergreen_yml_components/variants/compile_static_analysis.yml b/etc/evergreen_yml_components/variants/compile_static_analysis.yml
index aa9977f2dae..3ab4426b699 100644
--- a/etc/evergreen_yml_components/variants/compile_static_analysis.yml
+++ b/etc/evergreen_yml_components/variants/compile_static_analysis.yml
@@ -219,6 +219,7 @@ buildvariants:
- name: compile_test_and_package_parallel_dbtest_stream_TG
- name: compile_integration_and_test_parallel_stream_TG
- name: generate_buildid_to_debug_symbols_mapping
+ - name: iwyu_self_test_TG
- name: .lint
- name: resmoke_validation_tests
- name: server_discovery_and_monitoring_json_test_TG
diff --git a/etc/iwyu_mapping.imp b/etc/iwyu_mapping.imp
new file mode 100644
index 00000000000..8b4837c6279
--- /dev/null
+++ b/etc/iwyu_mapping.imp
@@ -0,0 +1,16 @@
+[
+ {include: ["<boost/smart_ptr/detail/operator_bool.hpp>", "private", "<boost/smart_ptr.hpp>", "public"]},
+ {include: ["\"boost/smart_ptr/detail/operator_bool.hpp\"", "private", "<boost/smart_ptr.hpp>", "public"]},
+ {include: ["<boost/optional/detail/optional_relops.hpp>", "private", "<boost/optional.hpp>", "public"]},
+ {include: ["\"boost/optional/detail/optional_relops.hpp\"", "private", "<boost/optional.hpp>", "public"]},
+ {include: ["<boost/optional/detail/optional_reference_spec.hpp>", "private", "<boost/optional.hpp>", "public"]},
+ {include: ["\"boost/optional/detail/optional_reference_spec.hpp\"", "private", "<boost/optional.hpp>", "public"]},
+ {include: ["<boost/tuple/detail/tuple_basic.hpp>", "private", "<boost/tuple/tuple.hpp>", "public"]},
+ {include: ["\"boost/tuple/detail/tuple_basic.hpp\"", "private", "<boost/tuple/tuple.hpp>", "public"]},
+ {include: ["<boost/program_options/detail/value_semantic.hpp>", "private", "<boost/program_options/value_semantic.hpp>", "public"]},
+ {include: ["\"boost/program_options/detail/value_semantic.hpp\"", "private", "<boost/program_options/value_semantic.hpp>", "public"]},
+ {include: ["<boost/optional/detail/optional_swap.hpp>", "private", "<boost/optional.hpp>", "public"]},
+ {include: ["\"boost/optional/detail/optional_swap.hpp\"", "private", "<boost/optional.hpp>", "public"]},
+ {include: ["<boost/preprocessor/iteration/detail/iter/limits/forward1_256.hpp>", "private", "<boost/preprocessor/iteration/iterate.hpp>", "public"]},
+ {include: ["\"boost/preprocessor/iteration/detail/iter/limits/forward1_256.hpp\"", "private", "<boost/preprocessor/iteration/iterate.hpp>", "public"]},
+]
diff --git a/etc/pip/components/lint.req b/etc/pip/components/lint.req
index 08ba18c9d00..9d7ba16ed21 100644
--- a/etc/pip/components/lint.req
+++ b/etc/pip/components/lint.req
@@ -10,3 +10,5 @@ yapf == 0.26.0
evergreen-lint == 0.1.3
types-setuptools == 57.4.12
types-requests == 2.26.3
+tqdm
+colorama
diff --git a/site_scons/site_tools/compilation_db.py b/site_scons/site_tools/compilation_db.py
index dc21a334c2f..ada1e49534a 100644
--- a/site_scons/site_tools/compilation_db.py
+++ b/site_scons/site_tools/compilation_db.py
@@ -23,6 +23,7 @@
import json
import SCons
import itertools
+import shlex
# Implements the ability for SCons to emit a compilation database for the MongoDB project. See
# http://clang.llvm.org/docs/JSONCompilationDatabase.html for details on what a compilation
@@ -154,6 +155,7 @@ def CompilationDbEntryAction(target, source, env, **kw):
"directory": env.Dir("#").abspath,
"command": ' '.join(cmd_list),
"file": str(env["__COMPILATIONDB_USOURCE"][0]),
+ "output": shlex.quote(' '.join([str(t) for t in env["__COMPILATIONDB_UTARGET"]])),
}
target[0].write(entry)
diff --git a/src/mongo/bson/mutable/mutable_bson_test_utils.h b/src/mongo/bson/mutable/mutable_bson_test_utils.h
index c7223131fd2..317e7666ef6 100644
--- a/src/mongo/bson/mutable/mutable_bson_test_utils.h
+++ b/src/mongo/bson/mutable/mutable_bson_test_utils.h
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#pragma once
+
#include <iosfwd>
#include "mongo/bson/mutable/document.h"
diff --git a/src/mongo/client/server_discovery_monitor.h b/src/mongo/client/server_discovery_monitor.h
index 93bcd4c53a8..c9e09d8006b 100644
--- a/src/mongo/client/server_discovery_monitor.h
+++ b/src/mongo/client/server_discovery_monitor.h
@@ -26,6 +26,9 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+
+#pragma once
+
#include "mongo/client/mongo_uri.h"
#include "mongo/client/replica_set_monitor_stats.h"
#include "mongo/client/sdam/sdam.h"
diff --git a/src/mongo/db/commands/apply_ops_cmd.cpp b/src/mongo/db/commands/apply_ops_cmd.cpp
index 06d1258476f..ae8d9d52d6b 100644
--- a/src/mongo/db/commands/apply_ops_cmd.cpp
+++ b/src/mongo/db/commands/apply_ops_cmd.cpp
@@ -33,7 +33,7 @@
#include "mongo/bson/util/bson_check.h"
#include "mongo/bson/util/bson_extract.h"
-#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/authorization_session.h" // IWYU pragma: keep
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/catalog/document_validation.h"
#include "mongo/db/client.h"
diff --git a/src/mongo/db/database_name.h b/src/mongo/db/database_name.h
index bde0341dd59..5ebd8fbf15c 100644
--- a/src/mongo/db/database_name.h
+++ b/src/mongo/db/database_name.h
@@ -103,7 +103,7 @@ public:
};
#define DBNAME_CONSTANT(id, db) static const ConstantProxy id;
-#include "database_name_reserved.def.h"
+#include "database_name_reserved.def.h" // IWYU pragma: keep
#undef DBNAME_CONSTANT
static constexpr size_t kMaxDatabaseNameLength = 63;
@@ -317,14 +317,14 @@ private:
// namespace_string.h for more details.
namespace dbname_detail::const_proxy_shared_states {
#define DBNAME_CONSTANT(id, db) constexpr inline DatabaseName::ConstantProxy::SharedState id{db};
-#include "database_name_reserved.def.h"
+#include "database_name_reserved.def.h" // IWYU pragma: keep
#undef DBNAME_CONSTANT
} // namespace dbname_detail::const_proxy_shared_states
#define DBNAME_CONSTANT(id, db) \
constexpr inline DatabaseName::ConstantProxy DatabaseName::id{ \
&dbname_detail::const_proxy_shared_states::id};
-#include "database_name_reserved.def.h"
+#include "database_name_reserved.def.h" // IWYU pragma: keep
#undef DBNAME_CONSTANT
} // namespace mongo
diff --git a/src/mongo/db/exec/js_function.h b/src/mongo/db/exec/js_function.h
index 55f1343fb39..8977ee88def 100644
--- a/src/mongo/db/exec/js_function.h
+++ b/src/mongo/db/exec/js_function.h
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#pragma once
+
#include <memory>
#include <string>
diff --git a/src/mongo/db/fts/unicode/byte_vector.h b/src/mongo/db/fts/unicode/byte_vector.h
index 13886d16ff3..62dc7e61ba7 100644
--- a/src/mongo/db/fts/unicode/byte_vector.h
+++ b/src/mongo/db/fts/unicode/byte_vector.h
@@ -33,11 +33,11 @@
// TODO replace this with #if BOOST_HW_SIMD_X86 >= BOOST_HW_SIMD_X86_SSE2_VERSION in boost 1.60
#if defined(_M_AMD64) || defined(__amd64__)
-#include "mongo/db/fts/unicode/byte_vector_sse2.h"
+#include "mongo/db/fts/unicode/byte_vector_sse2.h" // IWYU pragma: export
#elif defined(__powerpc64__)
-#include "mongo/db/fts/unicode/byte_vector_altivec.h"
+#include "mongo/db/fts/unicode/byte_vector_altivec.h" // IWYU pragma: export
#elif defined(__aarch64__)
-#include "mongo/db/fts/unicode/byte_vector_neon.h"
-#else // Other platforms go above here.
+#include "mongo/db/fts/unicode/byte_vector_neon.h" // IWYU pragma: export
+#else // Other platforms go above here.
#undef MONGO_HAVE_FAST_BYTE_VECTOR
#endif
diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h
index 1582a7083d7..9a1d433a564 100644
--- a/src/mongo/db/namespace_string.h
+++ b/src/mongo/db/namespace_string.h
@@ -195,7 +195,7 @@ public:
// type is incomplete, they can't be _declared_ fully constexpr (a constexpr
// limitation).
#define NSS_CONSTANT(id, db, coll) static const ConstantProxy id;
-#include "namespace_string_reserved.def.h"
+#include "namespace_string_reserved.def.h" // IWYU pragma: keep
#undef NSS_CONSTANT
/**
@@ -1139,14 +1139,14 @@ inline bool NamespaceString::validCollectionName(StringData coll) {
namespace nss_detail::const_proxy_shared_states {
#define NSS_CONSTANT(id, db, coll) \
constexpr inline NamespaceString::ConstantProxy::SharedState id{db, coll};
-#include "namespace_string_reserved.def.h"
+#include "namespace_string_reserved.def.h" // IWYU pragma: keep
#undef NSS_CONSTANT
} // namespace nss_detail::const_proxy_shared_states
#define NSS_CONSTANT(id, db, coll) \
constexpr inline NamespaceString::ConstantProxy NamespaceString::id{ \
&nss_detail::const_proxy_shared_states::id};
-#include "namespace_string_reserved.def.h"
+#include "namespace_string_reserved.def.h" // IWYU pragma: keep
#undef NSS_CONSTANT
} // namespace mongo
diff --git a/src/mongo/db/operation_context_test.cpp b/src/mongo/db/operation_context_test.cpp
index 5eb91401598..2f1d4e183b3 100644
--- a/src/mongo/db/operation_context_test.cpp
+++ b/src/mongo/db/operation_context_test.cpp
@@ -43,7 +43,7 @@
#include "mongo/db/session/logical_session_id.h"
#include "mongo/logv2/log.h"
#include "mongo/logv2/log_debug.h"
-#include "mongo/stdx/future.h"
+#include "mongo/stdx/future.h" // IWYU pragma: keep
#include "mongo/stdx/thread.h"
#include "mongo/transport/session.h"
#include "mongo/transport/transport_layer_mock.h"
diff --git a/src/mongo/db/pipeline/document_source_facet.h b/src/mongo/db/pipeline/document_source_facet.h
index 344b3448ac9..9ec4da97c2e 100644
--- a/src/mongo/db/pipeline/document_source_facet.h
+++ b/src/mongo/db/pipeline/document_source_facet.h
@@ -38,6 +38,7 @@
#include "mongo/db/pipeline/lite_parsed_document_source.h"
#include "mongo/db/pipeline/lite_parsed_pipeline.h"
#include "mongo/db/pipeline/pipeline.h"
+#include "mongo/db/pipeline/tee_buffer.h"
namespace mongo {
diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp
index b5ae2df4865..991c0782d33 100644
--- a/src/mongo/db/pipeline/document_source_unwind.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind.cpp
@@ -46,53 +46,6 @@ using boost::intrusive_ptr;
using std::string;
using std::vector;
-/** Helper class to unwind array from a single document. */
-class DocumentSourceUnwind::Unwinder {
-public:
- Unwinder(const FieldPath& unwindPath,
- bool preserveNullAndEmptyArrays,
- const boost::optional<FieldPath>& indexPath,
- bool strict);
- /** Reset the unwinder to unwind a new document. */
- void resetDocument(const Document& document);
-
- /**
- * @return the next document unwound from the document provided to resetDocument(), using
- * the current value in the array located at the provided unwindPath.
- *
- * Returns boost::none if the array is exhausted.
- */
- DocumentSource::GetNextResult getNext();
-
-private:
- // Tracks whether or not we can possibly return any more documents. Note we may return
- // boost::none even if this is true.
- bool _haveNext = false;
-
- // Path to the array to unwind.
- const FieldPath _unwindPath;
-
- // Documents that have a nullish value, or an empty array for the field '_unwindPath', will pass
- // through the $unwind stage unmodified if '_preserveNullAndEmptyArrays' is true.
- const bool _preserveNullAndEmptyArrays;
-
- // If set, the $unwind stage will include the array index in the specified path, overwriting any
- // existing value, setting to null when the value was a non-array or empty array.
- const boost::optional<FieldPath> _indexPath;
- // Specifies if input to $unwind is required to be an array.
- const bool _strict;
-
- Value _inputArray;
-
- MutableDocument _output;
-
- // Document indexes of the field path components.
- vector<Position> _unwindPathFieldIndexes;
-
- // Index into the _inputArray to return next.
- size_t _index = 0;
-};
-
DocumentSourceUnwind::Unwinder::Unwinder(const FieldPath& unwindPath,
bool preserveNullAndEmptyArrays,
const boost::optional<FieldPath>& indexPath,
diff --git a/src/mongo/db/pipeline/document_source_unwind.h b/src/mongo/db/pipeline/document_source_unwind.h
index f2b97085d33..00d4de32588 100644
--- a/src/mongo/db/pipeline/document_source_unwind.h
+++ b/src/mongo/db/pipeline/document_source_unwind.h
@@ -138,4 +138,51 @@ private:
boost::optional<long long> _smallestLimitPushedDown;
};
+/** Helper class to unwind array from a single document. */
+class DocumentSourceUnwind::Unwinder {
+public:
+ Unwinder(const FieldPath& unwindPath,
+ bool preserveNullAndEmptyArrays,
+ const boost::optional<FieldPath>& indexPath,
+ bool strict);
+ /** Reset the unwinder to unwind a new document. */
+ void resetDocument(const Document& document);
+
+ /**
+ * @return the next document unwound from the document provided to resetDocument(), using
+ * the current value in the array located at the provided unwindPath.
+ *
+ * Returns boost::none if the array is exhausted.
+ */
+ DocumentSource::GetNextResult getNext();
+
+private:
+ // Tracks whether or not we can possibly return any more documents. Note we may return
+ // boost::none even if this is true.
+ bool _haveNext = false;
+
+ // Path to the array to unwind.
+ const FieldPath _unwindPath;
+
+ // Documents that have a nullish value, or an empty array for the field '_unwindPath', will pass
+ // through the $unwind stage unmodified if '_preserveNullAndEmptyArrays' is true.
+ const bool _preserveNullAndEmptyArrays;
+
+ // If set, the $unwind stage will include the array index in the specified path, overwriting any
+ // existing value, setting to null when the value was a non-array or empty array.
+ const boost::optional<FieldPath> _indexPath;
+ // Specifies if input to $unwind is required to be an array.
+ const bool _strict;
+
+ Value _inputArray;
+
+ MutableDocument _output;
+
+ // Document indexes of the field path components.
+ std::vector<Position> _unwindPathFieldIndexes;
+
+ // Index into the _inputArray to return next.
+ size_t _index = 0;
+};
+
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_unwind_test.cpp b/src/mongo/db/pipeline/document_source_unwind_test.cpp
index d477873c98a..2cedc3f0cd6 100644
--- a/src/mongo/db/pipeline/document_source_unwind_test.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind_test.cpp
@@ -29,12 +29,15 @@
#include "mongo/platform/basic.h"
+#include "mongo/db/pipeline/document_source_unwind.h"
+
#include <boost/intrusive_ptr.hpp>
#include <deque>
#include <memory>
#include <string>
#include <vector>
+
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/json.h"
@@ -44,7 +47,6 @@
#include "mongo/db/pipeline/aggregation_context_fixture.h"
#include "mongo/db/pipeline/dependencies.h"
#include "mongo/db/pipeline/document_source_mock.h"
-#include "mongo/db/pipeline/document_source_unwind.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/query/query_test_service_context.h"
#include "mongo/db/service_context.h"
diff --git a/src/mongo/db/pipeline/inner_pipeline_stage_impl.h b/src/mongo/db/pipeline/inner_pipeline_stage_impl.h
index 4f5cafcddd6..2264889cda9 100644
--- a/src/mongo/db/pipeline/inner_pipeline_stage_impl.h
+++ b/src/mongo/db/pipeline/inner_pipeline_stage_impl.h
@@ -34,6 +34,7 @@
#include <boost/intrusive_ptr.hpp>
#include <boost/optional.hpp>
+#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/pipeline/inner_pipeline_stage_interface.h"
namespace mongo {
diff --git a/src/mongo/db/pipeline/partition_key_comparator.h b/src/mongo/db/pipeline/partition_key_comparator.h
index befc9839a55..ec3c7dd0077 100644
--- a/src/mongo/db/pipeline/partition_key_comparator.h
+++ b/src/mongo/db/pipeline/partition_key_comparator.h
@@ -26,6 +26,7 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+#pragma once
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/pipeline/memory_usage_tracker.h"
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index fa165639a0b..a7b06cd98d9 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -71,7 +71,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
namespace mongo {
namespace log_detail {
void logSubplannerIndexEntry(const IndexEntry& entry, size_t childIndex) {
@@ -827,7 +826,7 @@ StatusWith<std::unique_ptr<PlanCacheIndexTree>> QueryPlanner::cacheDataFromTagge
// static
Status QueryPlanner::tagAccordingToCache(MatchExpression* filter,
const PlanCacheIndexTree* const indexTree,
- const map<IndexEntry::Identifier, size_t>& indexMap) {
+ const std::map<IndexEntry::Identifier, size_t>& indexMap) {
if (nullptr == filter) {
return Status(ErrorCodes::NoQueryExecutionPlans, "Cannot tag tree: filter is NULL.");
}
@@ -946,7 +945,7 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache(
QueryPlannerIXSelect::expandIndexes(fields, params.indices, false /* indexHinted */);
// Map from index name to index number.
- map<IndexEntry::Identifier, size_t> indexMap;
+ std::map<IndexEntry::Identifier, size_t> indexMap;
for (size_t i = 0; i < expandedIndexes.size(); ++i) {
const IndexEntry& ie = expandedIndexes[i];
const auto insertionRes = indexMap.insert(std::make_pair(ie.identifier, i));
diff --git a/src/mongo/db/repl/apply_ops_command_info.h b/src/mongo/db/repl/apply_ops_command_info.h
index 22348ba4f8a..bb90078b00f 100644
--- a/src/mongo/db/repl/apply_ops_command_info.h
+++ b/src/mongo/db/repl/apply_ops_command_info.h
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#pragma once
+
#include <vector>
#include "mongo/base/status.h"
diff --git a/src/mongo/db/repl/storage_timestamp_test.cpp b/src/mongo/db/repl/storage_timestamp_test.cpp
index f6449a77f41..76ee5586e2e 100644
--- a/src/mongo/db/repl/storage_timestamp_test.cpp
+++ b/src/mongo/db/repl/storage_timestamp_test.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#include <fstream> // IWYU pragma: keep
+
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/mutable/algorithm.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
@@ -90,7 +92,7 @@
#include "mongo/dbtests/dbtests.h"
#include "mongo/logv2/log.h"
#include "mongo/rpc/get_status_from_command_result.h"
-#include "mongo/stdx/future.h"
+#include "mongo/stdx/future.h" // IWYU pragma: keep
#include "mongo/unittest/unittest.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/stacktrace.h"
diff --git a/src/mongo/db/s/resharding/resharding_data_replication.h b/src/mongo/db/s/resharding/resharding_data_replication.h
index 8d6659b65e5..3672229de9f 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication.h
+++ b/src/mongo/db/s/resharding/resharding_data_replication.h
@@ -36,7 +36,11 @@
#include "mongo/bson/timestamp.h"
#include "mongo/db/cancelable_operation_context.h"
#include "mongo/db/s/resharding/donor_oplog_id_gen.h"
+#include "mongo/db/s/resharding/resharding_collection_cloner.h"
+#include "mongo/db/s/resharding/resharding_oplog_applier.h"
#include "mongo/db/s/resharding/resharding_oplog_applier_metrics.h"
+#include "mongo/db/s/resharding/resharding_oplog_fetcher.h"
+#include "mongo/db/s/resharding/resharding_txn_cloner.h"
#include "mongo/db/shard_id.h"
#include "mongo/s/chunk_manager.h"
#include "mongo/s/resharding/common_types_gen.h"
diff --git a/src/mongo/db/s/resharding/resharding_data_replication_test.cpp b/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
index 5eabc1a3787..2f7002b5994 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#include "mongo/db/s/resharding/resharding_data_replication.h"
+
#include "mongo/bson/bsonmisc.h"
#include "mongo/db/catalog/collection_write_path.h"
#include "mongo/db/persistent_task_store.h"
@@ -34,7 +36,6 @@
#include "mongo/db/query/collation/collator_interface_mock.h"
#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/db/s/resharding/resharding_data_copy_util.h"
-#include "mongo/db/s/resharding/resharding_data_replication.h"
#include "mongo/db/s/resharding/resharding_oplog_applier_progress_gen.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/service_context_d_test_fixture.h"
diff --git a/src/mongo/db/serverless/shard_split_donor_service_test.cpp b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
index 2cf6bc538fa..ee330e6f65b 100644
--- a/src/mongo/db/serverless/shard_split_donor_service_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
@@ -167,25 +167,6 @@ private:
BSONObj _msg;
} mockReplSetReconfigCmd;
-namespace {
-sdam::TopologyDescriptionPtr makeRecipientTopologyDescription(const MockReplicaSet& set) {
- std::shared_ptr<TopologyDescription> topologyDescription =
- std::make_shared<sdam::TopologyDescription>(sdam::SdamConfiguration(
- set.getHosts(), sdam::TopologyType::kReplicaSetNoPrimary, set.getSetName()));
-
- for (auto& server : set.getHosts()) {
- auto serverDescription = sdam::ServerDescriptionBuilder()
- .withAddress(server)
- .withSetName(set.getSetName())
- .instance();
- topologyDescription->installServerDescription(serverDescription);
- }
-
- return topologyDescription;
-}
-
-} // namespace
-
std::ostream& operator<<(std::ostream& builder, mongo::ShardSplitDonorStateEnum state) {
switch (state) {
case mongo::ShardSplitDonorStateEnum::kUninitialized:
diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp
index 1aeb4a809c6..defcf6ca37d 100644
--- a/src/mongo/db/sorter/sorter_test.cpp
+++ b/src/mongo/db/sorter/sorter_test.cpp
@@ -41,7 +41,7 @@
#include "mongo/db/sorter/sorter.h"
#include "mongo/logv2/log.h"
#include "mongo/platform/random.h"
-#include "mongo/stdx/thread.h"
+#include "mongo/stdx/thread.h" // IWYU pragma: keep
#include "mongo/unittest/death_test.h"
#include "mongo/unittest/temp_dir.h"
#include "mongo/unittest/unittest.h"
diff --git a/src/mongo/db/traffic_reader_main.cpp b/src/mongo/db/traffic_reader_main.cpp
index cf94cf89dcb..dfd668f34fb 100644
--- a/src/mongo/db/traffic_reader_main.cpp
+++ b/src/mongo/db/traffic_reader_main.cpp
@@ -45,7 +45,7 @@
#include "mongo/util/text.h"
#include <boost/filesystem.hpp>
-#include <boost/program_options.hpp>
+#include <boost/program_options.hpp> // IWYU pragma: keep
using namespace mongo;
diff --git a/src/mongo/db/update/bit_node_test.cpp b/src/mongo/db/update/bit_node_test.cpp
index 0e218ebc29e..ac2444e6963 100644
--- a/src/mongo/db/update/bit_node_test.cpp
+++ b/src/mongo/db/update/bit_node_test.cpp
@@ -32,7 +32,7 @@
#include "mongo/db/update/bit_node.h"
#include "mongo/bson/mutable/algorithm.h"
-#include "mongo/bson/mutable/mutable_bson_test_utils.h"
+#include "mongo/bson/mutable/mutable_bson_test_utils.h" // IWYU pragma: keep
#include "mongo/db/json.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/update/update_node_test_fixture.h"
diff --git a/src/mongo/db/update/update_node_test_fixture.h b/src/mongo/db/update/update_node_test_fixture.h
index 513195c2caa..e5e84da0d61 100644
--- a/src/mongo/db/update/update_node_test_fixture.h
+++ b/src/mongo/db/update/update_node_test_fixture.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/bson/json.h"
#include "mongo/db/concurrency/locker_noop_service_context_test_fixture.h"
#include "mongo/db/service_context.h"
#include "mongo/db/update/document_diff_calculator.h"
diff --git a/src/mongo/executor/pinned_connection_task_executor.cpp b/src/mongo/executor/pinned_connection_task_executor.cpp
index eb3083b81d7..0d6d7570b72 100644
--- a/src/mongo/executor/pinned_connection_task_executor.cpp
+++ b/src/mongo/executor/pinned_connection_task_executor.cpp
@@ -30,7 +30,7 @@
#include "pinned_connection_task_executor.h"
#include "mongo/executor/network_interface.h"
#include "mongo/executor/thread_pool_task_executor.h"
-#include "mongo/util/scoped_unlock.h"
+#include "mongo/util/scoped_unlock.h" // IWYU pragma: keep
namespace mongo::executor {
/**
diff --git a/src/mongo/idl/idl_test.h b/src/mongo/idl/idl_test.h
index 9720ed2ab7a..39676da6b88 100644
--- a/src/mongo/idl/idl_test.h
+++ b/src/mongo/idl/idl_test.h
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#pragma once
+
#include <string>
#include <vector>
diff --git a/src/mongo/idl/server_parameter_test_util.h b/src/mongo/idl/server_parameter_test_util.h
index 0a61033d22c..0d45d2a3000 100644
--- a/src/mongo/idl/server_parameter_test_util.h
+++ b/src/mongo/idl/server_parameter_test_util.h
@@ -26,6 +26,7 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+#pragma once
#include <string>
diff --git a/src/mongo/logv2/log_options.h b/src/mongo/logv2/log_options.h
index d0a4316e3da..6f458927823 100644
--- a/src/mongo/logv2/log_options.h
+++ b/src/mongo/logv2/log_options.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/logv2/constants.h"
#include "mongo/logv2/log_component.h"
#include "mongo/logv2/log_manager.h"
#include "mongo/logv2/log_tag.h"
diff --git a/src/mongo/logv2/uassert_sink.h b/src/mongo/logv2/uassert_sink.h
index f0fe4255708..c4ad7b8069c 100644
--- a/src/mongo/logv2/uassert_sink.h
+++ b/src/mongo/logv2/uassert_sink.h
@@ -35,6 +35,7 @@
#include <boost/log/sinks.hpp>
#include "mongo/logv2/bson_formatter.h"
+#include "mongo/logv2/plain_formatter.h"
#include "mongo/util/assert_util.h"
namespace mongo::logv2 {
diff --git a/src/mongo/platform/process_id.cpp b/src/mongo/platform/process_id.cpp
index eff158ca41e..b4b404e0ff4 100644
--- a/src/mongo/platform/process_id.cpp
+++ b/src/mongo/platform/process_id.cpp
@@ -46,7 +46,7 @@
#include <iostream>
#include <limits>
-#include <sstream>
+#include <sstream> // IWYU pragma: keep
#include "mongo/base/static_assert.h"
#include "mongo/util/assert_util.h"
diff --git a/src/mongo/platform/visibility_test_lib1.h b/src/mongo/platform/visibility_test_lib1.h
index d35ff10e4e6..4b01683258c 100644
--- a/src/mongo/platform/visibility_test_lib1.h
+++ b/src/mongo/platform/visibility_test_lib1.h
@@ -26,6 +26,7 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
+#pragma once
#include <string>
#include <string_view>
diff --git a/src/mongo/scripting/dbdirectclient_factory.h b/src/mongo/scripting/dbdirectclient_factory.h
index eed8ec5dcf5..1393cc4ca08 100644
--- a/src/mongo/scripting/dbdirectclient_factory.h
+++ b/src/mongo/scripting/dbdirectclient_factory.h
@@ -32,9 +32,10 @@
#include <functional>
#include <memory>
+#include "mongo/client/dbclient_base.h"
+
namespace mongo {
-class DBClientBase;
class OperationContext;
class ServiceContext;
diff --git a/src/mongo/unittest/assert.h b/src/mongo/unittest/assert.h
index c70ccc644ad..62e71077982 100644
--- a/src/mongo/unittest/assert.h
+++ b/src/mongo/unittest/assert.h
@@ -45,6 +45,7 @@
#include "mongo/base/status_with.h"
#include "mongo/base/string_data.h"
+#include "mongo/bson/mutable/mutable_bson_test_utils.h"
#include "mongo/logv2/log_debug.h"
#include "mongo/logv2/log_detail.h"
#include "mongo/unittest/bson_test_util.h"
diff --git a/src/mongo/unittest/inline_auto_update.h b/src/mongo/unittest/inline_auto_update.h
index 535b9cef8e4..7121424f8c6 100644
--- a/src/mongo/unittest/inline_auto_update.h
+++ b/src/mongo/unittest/inline_auto_update.h
@@ -32,6 +32,8 @@
#include <ostream>
#include <vector>
+#include "mongo/unittest/assert.h"
+
namespace mongo::unittest {
/**
* Computes a difference between the expected and actual formatted output and outputs it to the
diff --git a/src/mongo/util/assert_util.h b/src/mongo/util/assert_util.h
index a18ce99569f..c0bbc20c20c 100644
--- a/src/mongo/util/assert_util.h
+++ b/src/mongo/util/assert_util.h
@@ -37,7 +37,7 @@
#include "mongo/base/status_with.h"
#include "mongo/platform/compiler.h"
#include "mongo/platform/source_location.h"
-#include "mongo/util/assert_util_core.h"
+#include "mongo/util/assert_util_core.h" // IWYU pragma: export
#include "mongo/util/concurrency/thread_name.h"
#include "mongo/util/debug_util.h"
#include "mongo/util/exit_code.h"
diff --git a/src/mongo/util/log_with_sampling.h b/src/mongo/util/log_with_sampling.h
index e3407c26e07..a02551d243a 100644
--- a/src/mongo/util/log_with_sampling.h
+++ b/src/mongo/util/log_with_sampling.h
@@ -31,6 +31,7 @@
#include <utility>
+#include "mongo/logv2/log.h"
#include "mongo/logv2/log_component.h"
#include "mongo/logv2/log_severity.h"
diff --git a/src/mongo/util/quick_exit.cpp b/src/mongo/util/quick_exit.cpp
index 2b1e31814e1..34da3a1cbc8 100644
--- a/src/mongo/util/quick_exit.cpp
+++ b/src/mongo/util/quick_exit.cpp
@@ -43,7 +43,7 @@
#include <cstdlib>
// NOTE: Header only dependencies are OK in this library.
-#include "mongo/stdx/mutex.h"
+#include "mongo/stdx/mutex.h" // IWYU pragma: keep
#if !defined(__has_feature)
#define __has_feature(x) 0
diff --git a/src/mongo/util/represent_as.h b/src/mongo/util/represent_as.h
index 24468809ba8..3ea78a2e886 100644
--- a/src/mongo/util/represent_as.h
+++ b/src/mongo/util/represent_as.h
@@ -37,6 +37,7 @@
#include <boost/optional.hpp>
#include "mongo/base/static_assert.h"
+#include "mongo/platform/decimal128.h"
#include "mongo/stdx/type_traits.h"
namespace mongo {
diff --git a/src/mongo/util/text.cpp b/src/mongo/util/text.cpp
index b3cab5ca4ff..24ecc62baf1 100644
--- a/src/mongo/util/text.cpp
+++ b/src/mongo/util/text.cpp
@@ -35,7 +35,7 @@
#include <cerrno>
#include <iostream>
#include <memory>
-#include <sstream>
+#include <sstream> // IWYU pragma: keep
#ifdef _WIN32
#include <io.h>
diff --git a/src/mongo/watchdog/watchdog.cpp b/src/mongo/watchdog/watchdog.cpp
index 83c6fc70317..7e0abd5e4a5 100644
--- a/src/mongo/watchdog/watchdog.cpp
+++ b/src/mongo/watchdog/watchdog.cpp
@@ -32,7 +32,7 @@
#include "mongo/watchdog/watchdog.h"
-#include <boost/align.hpp>
+#include <boost/align.hpp> // IWYU pragma: keep
#include <boost/filesystem.hpp>
#ifndef _WIN32