Revert "SERVER-67651 replace clang tidy with clang tidy condensed"

This reverts commit 43e94464c25d98bb5ffb8c83af83bc959e343d29.
author: auto-revert-processor <dev-prod-dag@mongodb.com> 2022-09-24 03:59:14 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-09-24 04:28:59 +0000
commit: b463bf5448847019639ff76a70e658dd6837bda7 (patch)
tree: 05cc3e5ca383c9499364283a004e57893b94a937 /buildscripts
parent: 05cda545eb5d1911753cbd3c8004aac9308d890c (diff)
download: mongo-b463bf5448847019639ff76a70e658dd6837bda7.tar.gz
4 files changed, 213 insertions, 182 deletions
diff --git a/buildscripts/clang_tidy.py b/buildscripts/clang_tidy.py
deleted file mode 100755
index 60631b5e9c7..00000000000
--- a/buildscripts/clang_tidy.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-"""Runs clang-tidy in parallel and combines the the results for easier viewing."""
-
-import argparse
-import datetime
-import json
-import os
-import subprocess
-import sys
-import locale
-import time
-from typing import Any, Dict, List, Optional, Tuple
-import multiprocessing
-from pathlib import Path
-from concurrent import futures
-from simple_report import Result, Report, put_report, try_combine_reports, make_report
-import yaml
-
-
-def _clang_tidy_executor(clang_tidy_filename: str, clang_tidy_binary: str,
-                         clang_tidy_cfg: Dict[str, Any], output_dir: str,
-                         show_stdout: bool) -> Tuple[str, Optional[str]]:
-
-    clang_tidy_parent_dir = output_dir / clang_tidy_filename.parent
-    os.makedirs(clang_tidy_parent_dir, exist_ok=True)
-
-    output_filename_base = clang_tidy_parent_dir / clang_tidy_filename.name
-    output_filename_fixes = output_filename_base.with_suffix(".yml")
-    clang_tidy_command = [
-        clang_tidy_binary, clang_tidy_filename, f"-export-fixes={output_filename_fixes}",
-        f"-config={json.dumps(clang_tidy_cfg)}"
-    ]
-    proc = subprocess.run(clang_tidy_command, capture_output=True, check=False)
-    files_to_parse = None
-    if proc.returncode != 0:
-        output_filename_out = output_filename_base.with_suffix(".fail")
-        files_to_parse = output_filename_fixes
-        if not show_stdout:
-            print(
-                f"Running clang-tidy on {clang_tidy_filename} had errors see {output_filename_out}")
-        else:
-            print(f"Running clang-tidy on {clang_tidy_filename}")
-            print(f"{proc.stderr.decode(locale.getpreferredencoding())}")
-            print(f"{proc.stdout.decode(locale.getpreferredencoding())}")
-    else:
-        output_filename_out = output_filename_base.with_suffix(".pass")
-        if not show_stdout:
-            print(f"Running clang-tidy on {clang_tidy_filename} had no errors")
-
-    with open(output_filename_out, 'wb') as output:
-        output.write(proc.stderr)
-        output.write(proc.stdout)
-    return proc.stdout.decode(locale.getpreferredencoding()), files_to_parse
-
-
-def _combine_errors(fixes_filename: str, files_to_parse: List[str]) -> int:
-    failed_files = 0
-    all_fixes = {}
-
-    #loop files_to_parse and count the number of failed_files
-    for item in files_to_parse:
-        if item is None:
-            continue
-        failed_files += 1
-
-        # Read the yaml fixes for the file to combine them with the other suggested fixes
-        with open(item) as input_yml:
-            fixes = yaml.safe_load(input_yml)
-        for fix in fixes['Diagnostics']:
-            fix_data = all_fixes.setdefault(fix["DiagnosticName"], {}).setdefault(
-                fix["FilePath"], {}).setdefault(
-                    fix["FileOffset"], {
-                        "replacements": fix["Replacements"], "message": fix["Message"], "count": 0,
-                        "source_files": []
-                    })
-            fix_data["count"] += 1
-            fix_data["source_files"].append(fixes['MainSourceFile'])
-    with open(fixes_filename, "w") as files_file:
-        json.dump(all_fixes, files_file, indent=4, sort_keys=True)
-
-    return failed_files
-
-
-def __dedup_errors(clang_tidy_errors_threads: List[str]) -> str:
-    #use dict as an 'ordered set'(in python 3.6+), set value to dummy value(true here)
-    error_to_dummy_value = dict()
-    for errs in clang_tidy_errors_threads:
-        if errs:
-            for val in errs.splitlines():
-                error_to_dummy_value[val] = True
-    return os.linesep.join(error_to_dummy_value.keys())
-
-
-def main():
-    """Execute Main entry point."""
-
-    parser = argparse.ArgumentParser(description='Run multithreaded clang-tidy')
-
-    parser.add_argument('-t', "--threads", type=int, default=multiprocessing.cpu_count(),
-                        help="Run with a specific number of threads")
-    parser.add_argument("-d", "--output-dir", type=str, default="clang_tidy_fixes",
-                        help="Directory to write all clang-tidy output to")
-    parser.add_argument("-o", "--fixes-file", type=str, default="clang_tidy_fixes.json",
-                        help="Report json file to write combined fixes to")
-    parser.add_argument("-c", "--compile-commands", type=str, default="compile_commands.json",
-                        help="compile_commands.json file to use to find the files to tidy")
-    parser.add_argument("-q", "--show-stdout", type=bool, default=True,
-                        help="Log errors to console")
-    parser.add_argument("-l", "--log-file", type=str, default="clang_tidy",
-                        help="clang tidy log from evergreen")
-    # TODO: Is there someway to get this without hardcoding this much
-    parser.add_argument("-y", "--clang-tidy-toolchain", type=str, default="v3")
-    parser.add_argument("-f", "--clang-tidy-cfg", type=str, default=".clang-tidy")
-    args = parser.parse_args()
-
-    clang_tidy_binary = f'/opt/mongodbtoolchain/{args.clang_tidy_toolchain}/bin/clang-tidy'
-
-    with open(args.compile_commands) as compile_commands:
-        compile_commands = json.load(compile_commands)
-
-    with open(args.clang_tidy_cfg) as clang_tidy_cfg:
-        clang_tidy_cfg = yaml.safe_load(clang_tidy_cfg)
-    files_to_tidy = list()
-    files_to_parse = list()
-    for file_doc in compile_commands:
-        # A few special cases of files to ignore
-        if not "src/mongo" in file_doc["file"]:
-            continue
-        # TODO SERVER-49884 Remove this when we no longer check in generated Bison.
-        if "parser_gen.cpp" in file_doc["file"]:
-            continue
-        files_to_tidy.append(Path(file_doc["file"]))
-
-    total_jobs = len(files_to_tidy)
-    workers = args.threads
-
-    clang_tidy_errors_futures: List[str] = []
-    clang_tidy_executor_futures: List[futures.ThreadPoolExecutor.submit] = []
-
-    # total completed tasks
-    tasks_completed = 0
-
-    with futures.ThreadPoolExecutor(max_workers=workers) as executor:
-        start_time = time.time()
-
-        # submit all futures
-        for clang_tidy_filename in files_to_tidy:
-            clang_tidy_executor_futures.append(
-                executor.submit(_clang_tidy_executor, clang_tidy_filename, clang_tidy_binary,
-                                clang_tidy_cfg, args.output_dir, args.show_stdout))
-
-        for future in futures.as_completed(clang_tidy_executor_futures):
-            clang_tidy_errors_futures.append(future.result()[0])
-            files_to_parse.append(future.result()[1])
-            tasks_completed += 1
-            pretty_time_duration = str(datetime.timedelta(seconds=time.time() - start_time))
-            print(
-                f" The number of jobs completed is {tasks_completed}/{total_jobs}. Duration {pretty_time_duration}"
-            )
-
-    failed_files = _combine_errors(Path(args.output_dir, args.fixes_file), files_to_parse)
-
-    # Zip up all the files for upload
-    subprocess.run(["tar", "-czvf", args.output_dir + ".tgz", args.output_dir], check=False)
-
-    # create report and dump to report.json
-    error_file_contents = __dedup_errors(clang_tidy_errors_futures)
-    report = make_report(args.log_file, error_file_contents, 1 if failed_files > 0 else 0)
-    try_combine_reports(report)
-    put_report(report)
-
-    return failed_files
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/buildscripts/clang_tidy.sh b/buildscripts/clang_tidy.sh
new file mode 100755
index 00000000000..28f37143775
--- /dev/null
+++ b/buildscripts/clang_tidy.sh
@@ -0,0 +1,42 @@
+set -o errexit
+set -o verbose
+
+CLANG_TIDY_TOOLCHAIN_VERSION="${1:-v3}"
+CLANG_TIDY_FIX_MODE="${2:-scan}"
+
+# check the version the user request matches the compile_commands
+TEST_COMMAND="$(jq -r '.[] | .command' compile_commands.json | head -n 1)"
+if [[ "$CLANG_TIDY_TOOLCHAIN_VERSION" != *"-force" ]] && [[ $TEST_COMMAND != "/opt/mongodbtoolchain/$CLANG_TIDY_TOOLCHAIN_VERSION"* ]]; then
+  echo "ERROR: compile commands generated with different toolchain version than $CLANG_TIDY_TOOLCHAIN_VERSION"
+  echo "Run with $CLANG_TIDY_TOOLCHAIN_VERSION-force to run clang-tidy anyways."
+  exit 1
+fi
+
+# if they forced it, extract the raw toolchain version
+if [[ "$CLANG_TIDY_TOOLCHAIN_VERSION" == *"-force" ]]; then
+  # the ?????? here strips off the "-force" but character counting
+  CLANG_TIDY_TOOLCHAIN_VERSION=${CLANG_TIDY_TOOLCHAIN_VERSION%??????}
+fi
+
+if [ "$CLANG_TIDY_FIX_MODE" == "fix" ]; then
+  CLANG_TIDY_MAX_ARGS=1
+  CLANG_TIDY_MAX_PROCESSES=1
+  CLANG_TIDY_FIX_MODE="--fix-errors"
+else
+  CLANG_TIDY_MAX_ARGS=32
+  CLANG_TIDY_MAX_PROCESSES=$(grep -c ^processor /proc/cpuinfo)
+  CLANG_TIDY_FIX_MODE=""
+fi
+
+# TODO SERVER-49884 Remove this when we no longer check in generated Bison.
+BISON_GENERATED_PATTERN=parser_gen\.cpp
+
+# Here we use the -header-filter option to instruct clang-tidy to scan our header files. The
+# regex instructs clang-tidy to scan headers in our source directory with the mongo/* regex, and
+# the build directory to analyze generated headers with the build/* regex
+jq -r '.[] | .file' compile_commands.json \
+  | grep src/mongo \
+  | grep -v $BISON_GENERATED_PATTERN \
+  | xargs -n $CLANG_TIDY_MAX_ARGS -P $CLANG_TIDY_MAX_PROCESSES -t \
+    /opt/mongodbtoolchain/$CLANG_TIDY_TOOLCHAIN_VERSION/bin/clang-tidy \
+    $CLANG_TIDY_FIX_MODE -p ./compile_commands.json
diff --git a/buildscripts/clang_tidy_condensed.py b/buildscripts/clang_tidy_condensed.py
new file mode 100755
index 00000000000..995ae5f3292
--- /dev/null
+++ b/buildscripts/clang_tidy_condensed.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+"""Runs clang-tidy in parallel and combines the the results for easier viewing."""
+
+import argparse
+import datetime
+import json
+import os
+import subprocess
+import sys
+import threading
+import queue
+import time
+from typing import Any, Dict, List, Optional
+import multiprocessing
+from pathlib import Path
+
+import yaml
+
+files_to_tidy = queue.SimpleQueue()
+files_to_parse = queue.SimpleQueue()
+
+
+def _clang_tidy_executor(clang_tidy_binary: str, clang_tidy_cfg: Dict[str, Any], output_dir: str):
+    while True:
+        clang_tidy_filename: Optional[Path] = files_to_tidy.get()
+        if clang_tidy_filename is None:
+            files_to_parse.put(None)
+            files_to_tidy.put(None)
+            break
+
+        print(f"Running clang-tidy on {clang_tidy_filename}")
+        clang_tidy_parent_dir = output_dir / clang_tidy_filename.parent
+        os.makedirs(clang_tidy_parent_dir, exist_ok=True)
+
+        output_filename_base = clang_tidy_parent_dir / clang_tidy_filename.name
+        output_filename_fixes = output_filename_base.with_suffix(".yml")
+        clang_tidy_command = [
+            clang_tidy_binary, clang_tidy_filename, f"-export-fixes={output_filename_fixes}",
+            f"-config={json.dumps(clang_tidy_cfg)}"
+        ]
+        proc = subprocess.run(clang_tidy_command, capture_output=True, check=False)
+        if proc.returncode != 0:
+            output_filename_out = output_filename_base.with_suffix(".fail")
+            files_to_parse.put(output_filename_fixes)
+            print(
+                f"Running clang-tidy on {clang_tidy_filename} had errors see {output_filename_out}")
+        else:
+            output_filename_out = output_filename_base.with_suffix(".pass")
+            print(f"Running clang-tidy on {clang_tidy_filename} had no errors")
+
+        with open(output_filename_out, 'wb') as output:
+            output.write(proc.stderr)
+            output.write(proc.stdout)
+
+
+def _combine_errors(clang_tidy_executors: int, fixes_filename: str) -> int:
+    failed_files = 0
+    all_fixes = {}
+    while clang_tidy_executors > 0:
+        item = files_to_parse.get()
+
+        # Once all running threads say they are done we want to exit
+        if item is None:
+            clang_tidy_executors -= 1
+            continue
+
+        failed_files += 1
+
+        # Read the yaml fixes for the file to combine them with the other suggested fixes
+        with open(item) as input_yml:
+            fixes = yaml.safe_load(input_yml)
+        for fix in fixes['Diagnostics']:
+            fix_data = all_fixes.setdefault(fix["DiagnosticName"], {}).setdefault(
+                fix["FilePath"], {}).setdefault(
+                    fix["FileOffset"], {
+                        "replacements": fix["Replacements"], "message": fix["Message"], "count": 0,
+                        "source_files": []
+                    })
+            fix_data["count"] += 1
+            fix_data["source_files"].append(fixes['MainSourceFile'])
+    with open(fixes_filename, "w") as files_file:
+        json.dump(all_fixes, files_file, indent=4, sort_keys=True)
+
+    return failed_files
+
+
+def _report_status(total_jobs: int, clang_tidy_executor_threads: List[threading.Thread]):
+    start_time = time.time()
+    running_jobs = 1
+    while running_jobs > 0:
+        time.sleep(5)
+        pretty_time_duration = str(datetime.timedelta(seconds=time.time() - start_time))
+        running_jobs = sum(
+            [1 for t in clang_tidy_executor_threads if t.is_alive()])  # Count threads running a job
+        # files_to_tidy contains a None which can be ignored
+        print(
+            f"There are {running_jobs} active jobs. The number of jobs queued is {files_to_tidy.qsize()-1}/{total_jobs}. Duration {pretty_time_duration}."
+        )
+
+
+def main():
+    """Execute Main entry point."""
+
+    parser = argparse.ArgumentParser(description='Run multithreaded clang-tidy')
+
+    parser.add_argument('-t', "--threads", type=int, default=multiprocessing.cpu_count(),
+                        help="Run with a specific number of threads")
+    parser.add_argument("-d", "--output-dir", type=str, default="clang_tidy_fixes",
+                        help="Directory to write all clang-tidy output to")
+    parser.add_argument("-o", "--fixes-file", type=str, default="clang_tidy_fixes.json",
+                        help="Report json file to write combined fixes to")
+    parser.add_argument("-c", "--compile-commands", type=str, default="compile_commands.json",
+                        help="compile_commands.json file to use to find the files to tidy")
+    # TODO: Is there someway to get this without hardcoding this much
+    parser.add_argument("-y", "--clang-tidy-toolchain", type=str, default="v3")
+    parser.add_argument("-f", "--clang-tidy-cfg", type=str, default=".clang-tidy")
+    args = parser.parse_args()
+
+    clang_tidy_binary = f'/opt/mongodbtoolchain/{args.clang_tidy_toolchain}/bin/clang-tidy'
+
+    with open(args.compile_commands) as compile_commands:
+        compile_commands = json.load(compile_commands)
+
+    with open(args.clang_tidy_cfg) as clang_tidy_cfg:
+        clang_tidy_cfg = yaml.safe_load(clang_tidy_cfg)
+
+    for file_doc in compile_commands:
+        # A few special cases of files to ignore
+        if not "src/mongo" in file_doc["file"]:
+            continue
+        # TODO SERVER-49884 Remove this when we no longer check in generated Bison.
+        if "parser_gen.cpp" in file_doc["file"]:
+            continue
+        files_to_tidy.put(Path(file_doc["file"]))
+
+    total_jobs = files_to_tidy.qsize()
+    files_to_tidy.put(None)
+    workers = args.threads
+
+    clang_tidy_executor_threads: List[threading.Thread] = []
+    for _ in range(workers):
+        clang_tidy_executor_threads.append(
+            threading.Thread(target=_clang_tidy_executor, args=(clang_tidy_binary, clang_tidy_cfg,
+                                                                args.output_dir)))
+        clang_tidy_executor_threads[-1].start()
+
+    report_status_thread = threading.Thread(target=_report_status,
+                                            args=(total_jobs, clang_tidy_executor_threads))
+    report_status_thread.start()
+
+    failed_files = _combine_errors(workers, Path(args.output_dir, args.fixes_file))
+
+    # Join all threads
+    report_status_thread.join()
+    for thread in clang_tidy_executor_threads:
+        thread.join()
+
+    # Zip up all the files for upload
+    subprocess.run(["tar", "-czvf", args.output_dir + ".tgz", args.output_dir], check=False)
+
+    return failed_files
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/buildscripts/simple_report.py b/buildscripts/simple_report.py
index cdfe0cf5427..4364530eed8 100644
--- a/buildscripts/simple_report.py
+++ b/buildscripts/simple_report.py
@@ -51,7 +51,7 @@ def _clean_log_file(log_file: pathlib.Path, dedup_lines: bool) -> str:
     return os.linesep.join(lines)
 
 
-def make_report(test_name: str, log_file_contents: str, exit_code: int) -> Report:
+def _make_report(test_name: str, log_file_contents: str, exit_code: int) -> Report:
     status = "pass" if exit_code == 0 else "fail"
     return Report({
         'failures':
@@ -64,7 +64,7 @@ def make_report(test_name: str, log_file_contents: str, exit_code: int) -> Repor
     })
 
 
-def try_combine_reports(out: Report):
+def _try_combine_reports(out: Report):
     try:
         with open("report.json") as fh:
             report = json.load(fh)
@@ -80,7 +80,7 @@ def _dedup_lines(lines: List[str]) -> List[str]:
     return list(set(lines))
 
 
-def put_report(out: Report):
+def _put_report(out: Report):
     with open("report.json", "w") as fh:
         json.dump(out, fh)
 
@@ -93,9 +93,9 @@ def put_report(out: Report):
 def main(test_name: str, log_file: pathlib.Path, exit_code: int, dedup_lines: bool):
     """Given a test name, path to log file and exit code, generate/append an Evergreen report.json."""
     log_file_contents = _clean_log_file(log_file, dedup_lines)
-    report = make_report(test_name, log_file_contents, exit_code)
-    try_combine_reports(report)
-    put_report(report)
+    report = _make_report(test_name, log_file_contents, exit_code)
+    _try_combine_reports(report)
+    _put_report(report)
 
 
 if __name__ == "__main__":
author	auto-revert-processor <dev-prod-dag@mongodb.com>	2022-09-24 03:59:14 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-09-24 04:28:59 +0000
commit	b463bf5448847019639ff76a70e658dd6837bda7 (patch)
tree	05cc3e5ca383c9499364283a004e57893b94a937 /buildscripts
parent	05cda545eb5d1911753cbd3c8004aac9308d890c (diff)
download	mongo-b463bf5448847019639ff76a70e658dd6837bda7.tar.gz