diff options
Diffstat (limited to 'deps/v8/tools/gcmole/gcmole.py')
-rwxr-xr-x | deps/v8/tools/gcmole/gcmole.py | 590 |
1 files changed, 343 insertions, 247 deletions
diff --git a/deps/v8/tools/gcmole/gcmole.py b/deps/v8/tools/gcmole/gcmole.py index 6961ec4fef..be605d8c55 100755 --- a/deps/v8/tools/gcmole/gcmole.py +++ b/deps/v8/tools/gcmole/gcmole.py @@ -6,14 +6,18 @@ # This is main driver for gcmole tool. See README for more details. # Usage: CLANG_BIN=clang-bin-dir python tools/gcmole/gcmole.py [arm|arm64|ia32|x64] +from contextlib import contextmanager +from contextlib import redirect_stderr from multiprocessing import cpu_count from pathlib import Path +import argparse import collections import difflib +import io import json -import optparse import os +import pickle import re import subprocess import sys @@ -21,17 +25,8 @@ import threading import queue -ArchCfg = collections.namedtuple( - "ArchCfg", ["name", "cpu", "triple", "arch_define", "arch_options"]) - -# TODO(cbruni): use gn desc by default for platform-specific settings -OPTIONS_64BIT = [ - "-DV8_COMPRESS_POINTERS", - "-DV8_COMPRESS_POINTERS_IN_SHARED_CAGE", - "-DV8_EXTERNAL_CODE_SPACE", - "-DV8_SHORT_BUILTIN_CALLS", - "-DV8_SHARED_RO_HEAP", -] +ArchCfg = collections.namedtuple("ArchCfg", + ["name", "cpu", "triple", "arch_options"]) ARCHITECTURES = { "ia32": @@ -39,7 +34,6 @@ ARCHITECTURES = { name="ia32", cpu="x86", triple="i586-unknown-linux", - arch_define="V8_TARGET_ARCH_IA32", arch_options=["-m32"], ), "arm": @@ -47,24 +41,19 @@ ARCHITECTURES = { name="arm", cpu="arm", triple="i586-unknown-linux", - arch_define="V8_TARGET_ARCH_ARM", arch_options=["-m32"], ), - # TODO(cbruni): Use detailed settings: - # arch_options = OPTIONS_64BIT + [ "-DV8_WIN64_UNWINDING_INFO" ] "x64": ArchCfg( name="x64", cpu="x64", triple="x86_64-unknown-linux", - arch_define="V8_TARGET_ARCH_X64", arch_options=[]), "arm64": ArchCfg( name="arm64", cpu="arm64", triple="x86_64-unknown-linux", - arch_define="V8_TARGET_ARCH_ARM64", arch_options=[], ), } @@ -86,6 +75,9 @@ def fatal(format): def make_clang_command_line(plugin, plugin_args, options): + with open(options.v8_build_dir / 'v8_gcmole.args') as f: + generated_args = f.read().strip().split() + arch_cfg = ARCHITECTURES[options.v8_target_cpu] prefixed_plugin_args = [] if plugin_args: @@ -97,7 +89,6 @@ def make_clang_command_line(plugin, plugin_args, options): arg, ] log("Using generated files in {}", options.v8_build_dir / 'gen') - icu_src_dir = options.v8_root_dir / 'third_party/icu/source' return ([ options.clang_bin_dir / "clang++", "-std=c++17", @@ -117,18 +108,8 @@ def make_clang_command_line(plugin, plugin_args, options): arch_cfg.triple, "-fno-exceptions", "-Wno-everything", - "-D", - arch_cfg.arch_define, - "-DENABLE_DEBUGGER_SUPPORT", - "-DV8_ENABLE_WEBASSEMBLY", "-DV8_GC_MOLE", - "-DV8_INTL_SUPPORT", - "-I{}".format(options.v8_root_dir), - "-I{}".format(options.v8_root_dir / 'include'), - "-I{}".format(options.v8_build_dir / 'gen'), - "-I{}".format(icu_src_dir / 'common'), - "-I{}".format(icu_src_dir / 'i18n'), - ] + arch_cfg.arch_options) + ] + generated_args + arch_cfg.arch_options) def invoke_clang_plugin_for_file(filename, cmd_line, verbose): @@ -222,56 +203,44 @@ def invoke_clang_plugin_for_each_file(filenames, plugin, plugin_args, options): # ----------------------------------------------------------------------------- -def parse_gn_file(options, for_test): - if for_test: - return {"all": [options.v8_root_dir / "tools/gcmole/gcmole-test.cc"]} - result = {} +def build_file_list(options): + """Calculates the list of source files to be checked with gcmole. + + The list comprises all files from marked source sections in the + listed BUILD.gn files. All files preceeded by the following comment and + until the end of the source section are used: + ### gcmole(arch) ### + Where arch can either be all (all architectures) or one of the supported V8 + architectures. + + The structure of these directives is also checked by presubmit via: + tools/v8_presubmit.py::GCMoleProcessor. + + Returns: List of file paths (of type Path). + """ + if options.test_run: + return [options.v8_root_dir / "tools/gcmole/gcmole-test.cc"] + result = [] gn_files = [ ("BUILD.gn", re.compile('"([^"]*?\.cc)"'), ""), ("test/cctest/BUILD.gn", re.compile('"(test-[^"]*?\.cc)"'), Path("test/cctest/")), ] - for filename, pattern, prefix in gn_files: + gn_re = re.compile(f"### gcmole\((all|{options.v8_target_cpu})\) ###(.*?)\]", + re.MULTILINE | re.DOTALL) + for filename, file_pattern, prefix in gn_files: path = options.v8_root_dir / filename with open(path) as gn_file: gn = gn_file.read() - for condition, sources in re.findall("### gcmole\((.*?)\) ###(.*?)\]", gn, - re.MULTILINE | re.DOTALL): - if condition not in result: - result[condition] = [] - for file in pattern.findall(sources): - result[condition].append(options.v8_root_dir / prefix / file) - - return result - - -def evaluate_condition(cond, props): - if cond == "all": - return True - - m = re.match("(\w+):(\w+)", cond) - if m is None: - fatal("failed to parse condition: {}", cond) - p, v = m.groups() - if p not in props: - fatal("undefined configuration property: {}", p) - - return props[p] == v + for _, sources in gn_re.findall(gn): + for file in file_pattern.findall(sources): + result.append(options.v8_root_dir / prefix / file) + # Filter files of current shard if running on multiple hosts. + def is_in_shard(index): + return (index % options.shard_count) == options.shard_index -def build_file_list(options, for_test): - sources = parse_gn_file(options, for_test) - props = { - "os": "linux", - "arch": options.v8_target_cpu, - "mode": "debug", - "simulator": "" - } - ret = [] - for condition, files in list(sources.items()): - if evaluate_condition(condition, props): - ret += files - return ret + return [f for i, f in enumerate(result) if is_in_shard(i)] # ----------------------------------------------------------------------------- @@ -326,18 +295,11 @@ IS_SPECIAL_WITH_ALLOW_LIST = merge_regexp({ }) -class GCSuspectsCollector: +class CallGraph: - def __init__(self, options): - self.gc = {} - self.gc_caused = collections.defaultdict(lambda: set()) - self.funcs = {} + def __init__(self): + self.funcs = collections.defaultdict(set) self.current_caller = None - self.allowlist = options.allowlist - self.is_special = IS_SPECIAL_WITH_ALLOW_LIST if self.allowlist else IS_SPECIAL_WITHOUT_ALLOW_LIST - - def add_cause(self, name, cause): - self.gc_caused[name].add(cause) def parse(self, lines): for funcname in lines: @@ -345,58 +307,114 @@ class GCSuspectsCollector: continue if funcname[0] != "\t": - self.resolve(funcname) + # Always inserting the current caller makes the serialized version + # more compact. + self.funcs[funcname] self.current_caller = funcname else: - name = funcname[1:] - callers_for_name = self.resolve(name) - callers_for_name.add(self.current_caller) + self.funcs[funcname[1:]].add(self.current_caller) + + def to_file(self, file_name): + """Store call graph in file 'file_name'.""" + log(f"Writing serialized callgraph to {file_name}") + with open(file_name, 'wb') as f: + pickle.dump(self, f) + + @staticmethod + def from_file(file_name): + """Restore call graph from file 'file_name'.""" + log(f"Reading serialized callgraph from {file_name}") + with open(file_name, 'rb') as f: + return pickle.load(f) + + @staticmethod + def from_files(*file_names): + """Merge multiple call graphs from a list of files.""" + callgraph = CallGraph() + for file_name in file_names: + funcs = CallGraph.from_file(file_name).funcs + for callee, callers in funcs.items(): + callgraph.funcs[callee].update(callers) + return callgraph + + +class GCSuspectsCollector: + + def __init__(self, options, funcs): + self.gc = {} + self.gc_caused = collections.defaultdict(set) + self.funcs = funcs + if options.allowlist: + self.is_special = IS_SPECIAL_WITH_ALLOW_LIST + else: + self.is_special = IS_SPECIAL_WITHOUT_ALLOW_LIST + + def add_cause(self, name, cause): + self.gc_caused[name].add(cause) def resolve(self, name): - if name not in self.funcs: - self.funcs[name] = set() - m = self.is_special.search(name) - if m: - if m.group("gc"): - self.gc[name] = True - self.add_cause(name, "<GC>") - elif m.group("safepoint"): - self.gc[name] = True - self.add_cause(name, "<Safepoint>") - elif m.group("allow"): - self.gc[name] = False - - return self.funcs[name] + m = self.is_special.search(name) + if not m: + return + + if m.group("gc"): + self.gc[name] = True + self.add_cause(name, "<GC>") + elif m.group("safepoint"): + self.gc[name] = True + self.add_cause(name, "<Safepoint>") + elif m.group("allow"): + self.gc[name] = False def propagate(self): log("Propagating GC information") - def mark(funcname, callers): - for caller in callers: + def mark(funcname): + for caller in self.funcs[funcname]: if caller not in self.gc: self.gc[caller] = True - mark(caller, self.funcs[caller]) + mark(caller) self.add_cause(caller, funcname) - for funcname, callers in list(self.funcs.items()): + for funcname in self.funcs: + self.resolve(funcname) + + for funcname in self.funcs: if self.gc.get(funcname, False): - mark(funcname, callers) + mark(funcname) + + +def generate_callgraph(files, options): + """Construct a (potentially partial) call graph from a subset of + source files. + """ + callgraph = CallGraph() + + log(f"Building call graph for {options.v8_target_cpu}") + for _, stdout, _ in invoke_clang_plugin_for_each_file( + files, "dump-callees", [], options): + callgraph.parse(stdout.splitlines()) + return callgraph -def generate_gc_suspects(files, options): - # Reset the global state. - collector = GCSuspectsCollector(options) - log("Building GC Suspects for {}", options.v8_target_cpu) - for _, stdout, _ in invoke_clang_plugin_for_each_file(files, "dump-callees", - [], options): - collector.parse(stdout.splitlines()) +def generate_gc_suspects_from_callgraph(callgraph, options): + """Calculate and store gc-suspect information from a given call graph.""" + collector = GCSuspectsCollector(options, callgraph.funcs) collector.propagate() # TODO(cbruni): remove once gcmole.cc is migrated write_gcmole_results(collector, options, options.v8_root_dir) write_gcmole_results(collector, options, options.out_dir) +def generate_gc_suspects_from_files(options): + """Generate file list and corresponding gc-suspect information.""" + files = build_file_list(options) + call_graph = generate_callgraph(files, options) + generate_gc_suspects_from_callgraph(call_graph, options) + return files + + def write_gcmole_results(collector, options, dst): # gcsuspects contains a list("mangled_full_name,name") of all functions that # could cause a gc (directly or indirectly). @@ -437,20 +455,12 @@ def write_gcmole_results(collector, options, dst): # Analysis -def check_correctness_for_arch(options, for_test): - files = build_file_list(options, for_test) - - if not options.reuse_gcsuspects: - generate_gc_suspects(files, options) - else: - log("Reusing GCSuspects for {}", options.v8_target_cpu) - +def check_correctness_for_arch(files, options): processed_files = 0 errors_found = False - output = "" log("Searching for evaluation order problems " + - (' and dead variables' if options.dead_vars else '') + "for" + + ("and dead variables " if options.dead_vars else "") + "for " + options.v8_target_cpu) plugin_args = [] if options.dead_vars: @@ -465,26 +475,39 @@ def check_correctness_for_arch(options, for_test): if not errors_found: errors_found = re.search("^[^:]+:\d+:\d+: (warning|error)", stderr, re.MULTILINE) is not None - if for_test: - output = output + stderr - else: - sys.stdout.write(stderr) + sys.stderr.write(stderr) log("Done processing {} files.", processed_files) log("Errors found" if errors_found else "No errors found") - return errors_found, output + return errors_found + + +def clean_test_output(output): + """Substitute line number patterns for files except gcmole-test.cc, as + otherwise unrelated code changes require a rebaseline of test expectations. + """ + return re.sub( + r'(?<!gcmole-test\.cc):\d*:\d*:', + ':<number>:<number>:', + output) -def test_run(options): +def has_unexpected_errors(options, errors_found, file_io): + """Returns True if error state isn't as expected, False otherwise. + + In test-run mode, we expect certain errors and return False if expectations + are met. + """ if not options.test_run: - return True + return errors_found + log("Test Run") - errors_found, output = check_correctness_for_arch(options, True) + output = clean_test_output(file_io.getvalue()) if not errors_found: log("Test file should produce errors, but none were found. Output:") print(output) - return False + return True new_file = options.out_dir / "test-expectations-gen.txt" with open(new_file, "w") as f: @@ -500,9 +523,9 @@ def test_run(options): print("#" * 79) log("Output mismatch from running tests.") log("Please run gcmole manually with --test-run --verbose.") - log("Expected: " + expected_file) - log("New: " + new_file) - log("*Diff:* " + diff_file) + log(f"Expected: {expected_file}") + log(f"New: {new_file}") + log(f"*Diff:* {diff_file}") print("#" * 79) for line in difflib.unified_diff( expectations.splitlines(), @@ -515,17 +538,17 @@ def test_run(options): print("#" * 79) log("Full output") - log("Expected: " + expected_file) - log("Diff: " + diff_file) - log("*New:* " + new_file) + log(f"Expected: {expected_file}") + log(f"Diff: {diff_file}") + log(f"*New*: {new_file}") print("#" * 79) print(output) print("#" * 79) - return False + return True log("Tests ran successfully") - return True + return False # ============================================================================= @@ -533,123 +556,186 @@ def relative_parents(path, level=0): return Path(os.path.relpath(str(path.resolve().parents[level]))) -def main(args): +def main(argv): # Get a clean parent path relative to PWD default_root_dir = relative_parents(Path(__file__), level=2) - if len(args) >= 1: - default_gcmole_dir = relative_parents(Path(args[0])) + if len(argv) >= 1: + default_gcmole_dir = relative_parents(Path(argv[0])) if default_gcmole_dir or not default_gcmole_dir.exists(): default_gcmole_dir = default_root_dir / 'tools' / 'gcmole' - - parser = optparse.OptionParser() - archs = list(ARCHITECTURES.keys()) - parser.add_option( - "--v8-root-dir", - metavar="DIR", - default=default_root_dir, - help="V8 checkout directory. Default: '{}'".format( - default_root_dir.absolute())) - parser.add_option( - "--v8-target-cpu", - type="choice", - default="x64", - choices=archs, - help="Tested CPU architecture. Choices: {}".format(archs), - metavar="CPU") default_clang_bin_dir = default_gcmole_dir / 'gcmole-tools/bin' - parser.add_option( - "--clang-bin-dir", - metavar="DIR", - help="Build dir of the custom clang version for gcmole." + \ - "Default: env['CLANG_DIR'] or '{}'".format(default_clang_bin_dir)) - parser.add_option( - "--clang-plugins-dir", - metavar="DIR", - help="Containing dir for libgcmole.so." - "Default: env['CLANG_PLUGINS'] or '{}'".format(default_gcmole_dir)) - parser.add_option( - "--v8-build-dir", - metavar="BUILD_DIR", - help="GN build dir for v8. Default: 'out/CPU.Release'. " - "Config must match cpu specified by --v8-target-cpu") - parser.add_option( - "--out-dir", - metavar="DIR", - help="Output location for the gcsuspect and gcauses file." - "Default: BUILD_DIR/gen/tools/gcmole") - parser.add_option( - "--is-bot", - action="store_true", - default=False, - help="Flag for setting build bot specific settings.") - - group = optparse.OptionGroup(parser, "GCMOLE options") - group.add_option( - "--reuse-gcsuspects", - action="store_true", - default=False, - help="Don't build gcsuspects file and reuse previously generated one.") - group.add_option( - "--sequential", - action="store_true", - default=False, - help="Don't use parallel python runner.") - group.add_option( - "--verbose", - action="store_true", - default=False, - help="Print commands to console before executing them.") - group.add_option( - "--no-dead-vars", - action="store_false", - dest="dead_vars", - default=True, - help="Don't perform dead variable analysis.") - group.add_option( - "--verbose-trace", - action="store_true", - default=False, - help="Enable verbose tracing from the plugin itself." - "This can be useful to debug finding dead variable.") - group.add_option( - "--no-allowlist", - action="store_true", - default=True, - dest="allowlist", - help="When building gcsuspects allowlist certain functions as if they can be " - "causing GC. Currently used to reduce number of false positives in dead " - "variables analysis. See TODO for ALLOWLIST in gcmole.py") - group.add_option( - "--test-run", - action="store_true", - default=False, - help="Test gcmole on tools/gcmole/gcmole-test.cc") - parser.add_option_group(group) - - (options, args) = parser.parse_args() - - if not options.v8_target_cpu: - # Backwards compatibility - if len(args) > 0 and args[0] in archs: - options.v8_target_cpu = args[0] - log("Using --v8-target-cpu={}", options.v8_target_cpu) - else: - parser.error("Missing --v8-target-cpu option") + + def add_common_args(parser): + archs = list(ARCHITECTURES.keys()) + parser.add_argument( + "--v8-root-dir", + metavar="DIR", + default=default_root_dir, + help="V8 checkout directory. Default: '{}'".format( + default_root_dir.absolute())) + parser.add_argument( + "--v8-target-cpu", + default="x64", + choices=archs, + help="Tested CPU architecture. Choices: {}".format(archs), + metavar="CPU") + parser.add_argument( + "--clang-bin-dir", + metavar="DIR", + help="Build dir of the custom clang version for gcmole." + \ + "Default: env['CLANG_DIR'] or '{}'".format(default_clang_bin_dir)) + parser.add_argument( + "--clang-plugins-dir", + metavar="DIR", + help="Containing dir for libgcmole.so." + "Default: env['CLANG_PLUGINS'] or '{}'".format(default_gcmole_dir)) + parser.add_argument( + "--v8-build-dir", + metavar="BUILD_DIR", + help="GN build dir for v8. Default: 'out/CPU.Release'. " + "Config must match cpu specified by --v8-target-cpu") + parser.add_argument( + "--out-dir", + metavar="DIR", + help="Output location for the gcsuspect and gcauses file." + "Default: BUILD_DIR/gen/tools/gcmole") + parser.add_argument( + "--is-bot", + action="store_true", + default=False, + help="Flag for setting build bot specific settings.") + parser.add_argument( + "--shard-count", + default=1, + type=int, + help="Number of tasks the current action (e.g. collect or check) " + "is distributed to.") + parser.add_argument( + "--shard-index", + default=0, + type=int, + help="Index of the current task (in [0..shard-count-1]) if the " + "overall action is distributed (shard-count > 1).") + + group = parser.add_argument_group("GCMOLE options") + group.add_argument( + "--sequential", + action="store_true", + default=False, + help="Don't use parallel python runner.") + group.add_argument( + "--verbose", + action="store_true", + default=False, + help="Print commands to console before executing them.") + group.add_argument( + "--no-dead-vars", + action="store_false", + dest="dead_vars", + default=True, + help="Don't perform dead variable analysis.") + group.add_argument( + "--verbose-trace", + action="store_true", + default=False, + help="Enable verbose tracing from the plugin itself." + "This can be useful to debug finding dead variable.") + group.add_argument( + "--no-allowlist", + action="store_true", + default=True, + dest="allowlist", + help="When building gcsuspects allowlist certain functions as if they can be " + "causing GC. Currently used to reduce number of false positives in dead " + "variables analysis. See TODO for ALLOWLIST in gcmole.py") + group.add_argument( + "--test-run", + action="store_true", + default=False, + help="Test gcmole on tools/gcmole/gcmole-test.cc") + + parser = argparse.ArgumentParser() + subps = parser.add_subparsers() + + subp = subps.add_parser( + "full", description="Run both gcmole analysis passes.") + add_common_args(subp) + subp.set_defaults(func=full_run) + + subp = subps.add_parser( + "collect", + description="Construct call graph from source files. " + "The action can be distributed using --shard-count and " + "--shard-index.") + add_common_args(subp) + subp.set_defaults(func=collect_run) + subp.add_argument( + "--output", + required=True, + help="Path to a file where to store the constructed call graph") + + subp = subps.add_parser( + "merge", + description="Merge partial call graphs and propagate gc suspects.") + add_common_args(subp) + subp.set_defaults(func=merge_run) + subp.add_argument( + "--input", + action='append', + required=True, + help="Path to a file containing a partial call graph stored by " + "'collect'. Repeat for multiple files.") + + subp = subps.add_parser( + "check", + description="Check for problems using previously collected gc-suspect " + "information. The action can be distributed using " + "--shard-count and --shard-index.") + add_common_args(subp) + subp.set_defaults(func=check_run) + + options = parser.parse_args(argv[1:]) verify_and_convert_dirs(parser, options, default_gcmole_dir, default_clang_bin_dir) verify_clang_plugin(parser, options) prepare_gcmole_files(options) verify_build_config(parser, options) + override_env_options(options) - any_errors_found = False - if not test_run(options): - any_errors_found = True - else: - errors_found, output = check_correctness_for_arch(options, False) - any_errors_found = any_errors_found or errors_found + options.func(options) + + +@contextmanager +def maybe_redirect_stderr(options): + file_io = io.StringIO() if options.test_run else sys.stderr + with redirect_stderr(file_io) as f: + yield f + + +def check_files(options, files): + with maybe_redirect_stderr(options) as file_io: + errors_found = check_correctness_for_arch(files, options) + sys.exit(has_unexpected_errors(options, errors_found, file_io)) + + +def full_run(options): + check_files(options, generate_gc_suspects_from_files(options)) - sys.exit(1 if any_errors_found else 0) + +def collect_run(options): + files = build_file_list(options) + callgraph = generate_callgraph(files, options) + callgraph.to_file(options.output) + + +def merge_run(options): + generate_gc_suspects_from_callgraph( + CallGraph.from_files(*options.input), options) + + +def check_run(options): + check_files(options, build_file_list(options)) def verify_and_convert_dirs(parser, options, default_tools_gcmole_dir, @@ -700,13 +786,15 @@ def verify_and_convert_dirs(parser, options, default_tools_gcmole_dir, else: options.out_dir = Path(options.out_dir) - for flag in [ - "--v8-root-dir", "--v8-build-dir", "--clang-bin-dir", - "--clang-plugins-dir", "--out-dir" + for flag, path in [ + ("--v8-root-dir", options.v8_root_dir), + ("--v8-build-dir", options.v8_build_dir), + ("--clang-bin-dir", options.clang_bin_dir), + ("--clang-plugins-dir", options.clang_plugins_dir), + ("--out-dir", options.out_dir), ]: - dir = getattr(options, parser.get_option(flag).dest) - if not dir.is_dir(): - parser.error("{}='{}' does not exist!".format(flag, dir)) + if not path.is_dir(): + parser.error(f"{flag}='{path}' does not exist!") def verify_clang_plugin(parser, options): @@ -753,5 +841,13 @@ def verify_build_config(parser, options): options.v8_build_dir, options.v8_target_cpu, found_cpu)) +def override_env_options(options): + """Set shard options if passed as gtest environment vars on bots.""" + options.shard_count = int( + os.environ.get('GTEST_TOTAL_SHARDS', options.shard_count)) + options.shard_index = int( + os.environ.get('GTEST_SHARD_INDEX', options.shard_index)) + + if __name__ == "__main__": main(sys.argv) |