diff options
author | Richard Samuels <richard.l.samuels@gmail.com> | 2022-07-28 16:40:19 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-28 17:57:53 +0000 |
commit | 073d0200f44cf057b4957735959f344f95bd9cb4 (patch) | |
tree | b0a80a5a15024ea84ed87c37851ec0d9d579270d /site_scons | |
parent | ac71e4974a705ba7e29dabe9d8f6d4bdbc3113e1 (diff) | |
download | mongo-073d0200f44cf057b4957735959f344f95bd9cb4.tar.gz |
SERVER-67056 Create artifact metrics
Diffstat (limited to 'site_scons')
6 files changed, 294 insertions, 2 deletions
diff --git a/site_scons/site_tools/build_metrics/__init__.py b/site_scons/site_tools/build_metrics/__init__.py index f987c9b99eb..aa51a0c4413 100644 --- a/site_scons/site_tools/build_metrics/__init__.py +++ b/site_scons/site_tools/build_metrics/__init__.py @@ -26,6 +26,7 @@ import json import os import sys import time +from timeit import default_timer as timer from jsonschema import validate import psutil @@ -33,6 +34,7 @@ import psutil from .util import add_meta_data, get_build_metric_dict, CaptureAtexits from .memory import MemoryMonitor from .per_action_metrics import PerActionMetrics +from .artifacts import CollectArtifacts _SEC_TO_NANOSEC_FACTOR = 1000000000.0 _METRICS_COLLECTORS = [] @@ -42,7 +44,10 @@ def finalize_build_metrics(env): metrics = get_build_metric_dict() metrics['end_time'] = time.time_ns() for m in _METRICS_COLLECTORS: + start_time = timer() + sys.stdout.write(f"Processing {m.get_name()}...") key, value = m.finalize() + sys.stdout.write(f" {timer() - start_time}s\n") metrics[key] = value with open(os.path.join(os.path.dirname(__file__), "build_metrics_format.schema")) as f: @@ -81,8 +86,19 @@ def generate(env, **kwargs): _METRICS_COLLECTORS = [ MemoryMonitor(psutil.Process().memory_info().vms), PerActionMetrics(), + CollectArtifacts(env) ] def exists(env): return True + + +def options(opts): + """ + Add command line Variables for build metrics tool. + """ + opts.AddVariables( + ("BUILD_METRICS_ARTIFACTS_DIR", "Path to scan for artifacts after the build has stopped."), + ("BUILD_METRICS_BLOATY", "Path to the bloaty bin"), + ) diff --git a/site_scons/site_tools/build_metrics/artifacts.py b/site_scons/site_tools/build_metrics/artifacts.py new file mode 100644 index 00000000000..7adf95336fe --- /dev/null +++ b/site_scons/site_tools/build_metrics/artifacts.py @@ -0,0 +1,219 @@ +import os +import glob +import subprocess +import csv +import io +import enum +import platform +import puremagic +import pathlib +from typing import Optional + +from SCons.Node.FS import File, Dir +from typing_extensions import TypedDict +from .util import get_build_metric_dict +from .protocol import BuildMetricsCollector + + +class ArtifactType(str, enum.Enum): + UNKNOWN = "unknown" + PROGRAM = "Program" # .exe + LIBRARY = "Library" # .so, .a + ARCHIVE = "archive" # .zip, .tgz, not .a + OBJECT = "Object" # .o + TEXT = "text" # .h, .hpp, .cpp + + +# Types to run bloaty against +ARTIFACT_BIN_TYPES = [ArtifactType.PROGRAM, ArtifactType.LIBRARY, ArtifactType.OBJECT] + + +class BinSize(TypedDict): + vmsize: int + filesize: int + + +class BinMetrics(TypedDict, total=False): + text: BinSize + data: BinSize + rodata: BinSize + bss: BinSize + debug: BinSize + symtab: BinSize + dyntab: BinSize + + +def _run_bloaty(bloaty, target) -> Optional[BinMetrics]: + out = BinMetrics() + try: + # -n 0 -> do not collapse small sections into a section named [Other] + # --csv -> generate csv output to stdout + # -d sections -> only list sections, not symbols + proc = subprocess.run([bloaty, "-n", "0", "--csv", "-d", "sections", + str(target)], capture_output=True, universal_newlines=True) + if proc.returncode != 0: + # if we run bloaty against a thin archive, it will fail. Detect + # this and allow thin archives to pass, otherwise raise an + # exception. + # Note that our thin_archive tool sets the thin_archive + # attribute to True + if proc.stderr.startswith("bloaty: unknown file type for file") and getattr( + target.attributes, "thin_archive", False): + # this is a thin archive, pass it + return None + + raise RuntimeError(f"Failed to call bloaty on '{str(target)}': {proc.stderr}") + + for row in csv.DictReader(proc.stdout.splitlines()): + # sections,vmsize,filesize + section = row['sections'] + vmsize = int(row['vmsize']) + filesize = int(row['filesize']) + binsize = BinSize(vmsize=vmsize, filesize=filesize) + if section == ".text": + out["text"] = binsize + elif section == ".data": + out["data"] = binsize + elif section == ".rodata": + out["rodata"] = binsize + elif section == ".bss": + out["bss"] = binsize + elif section.startswith(".debug"): + # there are multiple sections that start with .debug, and we + # need to sum them up. + if "debug" not in out: + out["debug"] = BinSize(vmsize=0, filesize=0) + out["debug"]["vmsize"] += vmsize + out["debug"]["filesize"] += filesize + elif section == ".symtab": + out["symtab"] = binsize + elif section == ".dyntab": + out["dyntab"] = binsize + + return out + + except FileNotFoundError: + if not _run_bloaty.printed_missing_bloaty_warning: + print( + "WARNING: could not find the bloaty binary. Binary section metrics will not be collected." + ) + _run_bloaty.printed_missing_bloaty_warning = True + return None + + +_run_bloaty.printed_missing_bloaty_warning = False + + +class Artifact(TypedDict, total=False): + array_index: int + name: str + type: str + size: int + bin_metrics: BinMetrics + + +# First key: platform.system() +# Tuple key 1: ArtifactType +# Tuple Key 2: string to search for +_PLATFORM_LIBMAGIC_BINARY_IDENTITIES = { + "Windows": [(ArtifactType.LIBRARY, "executable (DLL)"), (ArtifactType.PROGRAM, "executable")], + "Linux": [(ArtifactType.PROGRAM, "interpreter"), (ArtifactType.LIBRARY, "shared object")], + "Darwin": [(ArtifactType.PROGRAM, "Mach-O universal binary"), + (ArtifactType.LIBRARY, "linked shared library")], +} + +_ARTIFACT_TYPE_FROM_BUILDER = { + "SharedObject": ArtifactType.OBJECT, # .dyn.o + "StaticObject": ArtifactType.OBJECT, # .o + "StaticLibrary": ArtifactType.LIBRARY, # .a + "Idlc": ArtifactType.TEXT, # _gen.{h,cpp} + "Program": ArtifactType.PROGRAM, # .exe/*nix binaries + "Substfile": ArtifactType.TEXT, # build/opt/mongo/config.h and others + "InstallBuilder": ArtifactType.TEXT, # build/opt/third_party/wiredtiger/wiredtiger_ext.h + "Textfile": ArtifactType.TEXT, # build/opt/third_party/third_party_shim.cpp +} + +_TEXT_IDENTIFIERS = ["ASCII text", "Unicode text"] + +_EXTENSION_FALLBACK = { + ".cpp": ArtifactType.TEXT, + ".h": ArtifactType.TEXT, + ".hpp": ArtifactType.TEXT, + ".js": ArtifactType.TEXT, + ".idl": ArtifactType.TEXT, + ".so": ArtifactType.LIBRARY, + ".o": ArtifactType.OBJECT, + + # Windows + ".obj": ArtifactType.OBJECT, + ".lib": ArtifactType.LIBRARY, + # ilk, exp, pdb and res files on Windows have no appropriate tag, so we + # allow them to fallthrough to UNKNOWN +} + + +class CollectArtifacts(BuildMetricsCollector): + def __init__(self, env): + self._env = env + self._env = env + self._build_dir = env.get("BUILD_METRICS_ARTIFACTS_DIR", env.Dir('#').abspath) + self._artifacts = [] + self._bloaty_bin = env.get("BUILD_METRICS_BLOATY", "bloaty") + self._metrics = {"total_artifact_size": 0, "num_artifacts": 0, "artifacts": []} + + def get_name(self): + return "CollectArtifacts" + + def walk(self, dirname): + for root, dirs, files in os.walk(dirname): + self._artifacts += list(map(lambda x: os.path.join(root, x), files)) + + def finalize(self): + self.walk(self._env.Dir(self._env.subst(self._build_dir)).abspath) + + for artifact in self._artifacts: + artifact_dict = self._identify_artifact(artifact) + artifact_dict["array_index"] = len(self._metrics["artifacts"]) + self._metrics["artifacts"].append(artifact_dict) + self._metrics["total_artifact_size"] += artifact_dict["size"] + self._metrics["num_artifacts"] = len(self._metrics["artifacts"]) + return "artifact_metrics", self._metrics + + def _identify_artifact(self, file_) -> Artifact: + def _type_from_builder(builder) -> ArtifactType: + name = builder.get_name(self._env) + return _ARTIFACT_TYPE_FROM_BUILDER.get(name, ArtifactType.UNKNOWN) + + type_ = ArtifactType.UNKNOWN + file_str = str(file_) + node = self._env.File(file_) + builder = node.get_builder() + if builder is not None: + type_ = _type_from_builder(builder) + + if type_ == ArtifactType.UNKNOWN: + try: + magic_out = puremagic.from_file(file_str) + system = platform.system() + for search_type in _PLATFORM_LIBMAGIC_BINARY_IDENTITIES.get(system): + if search_type[1] in magic_out: + type_ = search_type[0] + break + + if type_ == ArtifactType.UNKNOWN and any(s in magic_out for s in _TEXT_IDENTIFIERS): + type_ = ArtifactType.TEXT + except puremagic.main.PureError: + # exception means that puremagic failed to id the filetype. We'll + # fallback to file extension in this case. + pass + if type_ == ArtifactType.UNKNOWN: + type_ = _EXTENSION_FALLBACK.get(pathlib.Path(file_str).suffix, ArtifactType.UNKNOWN) + + out = Artifact({"name": file_, "type": type_, "size": node.get_size()}) + + if type_ in ARTIFACT_BIN_TYPES: + bin_metrics = _run_bloaty(self._bloaty_bin, node) + if bin_metrics is not None: + out["bin_metrics"] = bin_metrics + + return out diff --git a/site_scons/site_tools/build_metrics/build_metrics_format.schema b/site_scons/site_tools/build_metrics/build_metrics_format.schema index 61343dc819f..559a1ee90a3 100644 --- a/site_scons/site_tools/build_metrics/build_metrics_format.schema +++ b/site_scons/site_tools/build_metrics/build_metrics_format.schema @@ -7,6 +7,18 @@ "memory": { "type": "integer", "description": "Virtual memory used in bytes" + }, + "bytes": { + "type": "integer", + "description": "Size in bytes", + "minimum": 0 + }, + "binsize": { + "type": "object", + "properties": { + "filesize": { "$ref": "#/$defs/bytes" }, + "vmsize": { "$ref": "#/$defs/bytes" } + } } }, "type" : "object", @@ -34,6 +46,41 @@ "start_mem": { "$ref": "#/$defs/memory" } } }, + "artifact_metrics": { + "type": "object", + "properties": { + "total_artifact_size": { "$ref": "#/$defs/bytes" }, + "num_artifacts": { "type": "integer" }, + "artifacts": { + "type": "array", + "items": { + "type": "object", + "required": ["array_index", "name", "type", "size"], + "properties": { + "array_index": { "type": "integer" }, + "name": { "type": "string" }, + "type": { + "type": "string", + "enum": ["Object", "Library", "Program", "text", "json", "archive", "unknown"] + }, + "size": { "$ref": "#/$defs/bytes" }, + "bin_metrics": { + "type": "object", + "properties": { + "text": { "$ref": "#/$defs/binsize" }, + "data": { "$ref": "#/$defs/binsize" }, + "rodata": { "$ref": "#/$defs/binsize" }, + "bss": { "$ref": "#/$defs/binsize" }, + "debug": { "$ref": "#/$defs/binsize" }, + "symtab": { "$ref": "#/$defs/binsize" }, + "dyntab": { "$ref": "#/$defs/binsize" } + } + } + } + } + } + } + }, "build_tasks" : { "type": "array", "task": { diff --git a/site_scons/site_tools/build_metrics/memory.py b/site_scons/site_tools/build_metrics/memory.py index f3338bbf7ee..33c56a2c4e0 100644 --- a/site_scons/site_tools/build_metrics/memory.py +++ b/site_scons/site_tools/build_metrics/memory.py @@ -19,6 +19,9 @@ class MemoryMonitor(BuildMetricsCollector): self._thread = threading.Thread(target=self.memory_monitor, daemon=True) self._thread.start() + def get_name(self): + return "System Memory Monitor" + def finalize(self): self._stop = True self._record_data_point() diff --git a/site_scons/site_tools/build_metrics/per_action_metrics.py b/site_scons/site_tools/build_metrics/per_action_metrics.py index 9c7c1ca902e..c0d9d8fb806 100644 --- a/site_scons/site_tools/build_metrics/per_action_metrics.py +++ b/site_scons/site_tools/build_metrics/per_action_metrics.py @@ -124,6 +124,9 @@ class PerActionMetrics(BuildMetricsCollector): SCons.Action.FunctionAction.execute = build_metrics_FunctionAction_execute + def get_name(self): + return "Per-Action Metrics" + def get_mem_cpu(self, proc): with proc.oneshot(): cpu = (proc.cpu_times().system + proc.cpu_times().user) diff --git a/site_scons/site_tools/build_metrics/protocol.py b/site_scons/site_tools/build_metrics/protocol.py index cc3ea17f64b..fe8c240f99c 100644 --- a/site_scons/site_tools/build_metrics/protocol.py +++ b/site_scons/site_tools/build_metrics/protocol.py @@ -1,9 +1,13 @@ -from typing import Tuple +from typing import Tuple, Any from typing_extensions import Protocol from abc import abstractmethod class BuildMetricsCollector(Protocol): @abstractmethod - def finalize(self) -> Tuple[str, str]: + def finalize(self) -> Tuple[str, Any]: + raise NotImplementedError + + @abstractmethod + def get_name() -> str: raise NotImplementedError |