SERVER-64072 Upload compile/build times to Signal Processing

author: Mikhail Shchatko <mikhail.shchatko@mongodb.com> 2022-03-28 08:46:35 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-03-28 09:14:20 +0000
commit: f2adf371c8668950d02c26ba7ea38183cd0af473 (patch)
tree: a9a8aae2d70a1020186e4b638dea0565a3db3e1d /buildscripts
parent: df35160198cec8f1ab2f66f9c286e0ac377146c3 (diff)
download: mongo-f2adf371c8668950d02c26ba7ea38183cd0af473.tar.gz
5 files changed, 380 insertions, 42 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
index 2165cb35557..af7950a7b93 100644
--- a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
+++ b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
@@ -3,47 +3,12 @@
 import collections
 import datetime
 import json
-from dataclasses import dataclass
-from typing import Union, List, Dict, Any
+from typing import List, Dict, Any
 
 from buildscripts.resmokelib import config as _config
 from buildscripts.resmokelib.errors import CedarReportError
 from buildscripts.resmokelib.testing.hooks import interface
-
-
-@dataclass
-class _CedarMetric:
-    """Structure that holds metrics for Cedar."""
-
-    name: str
-    type: str
-    value: Union[int, float]
-    user_submitted: bool = False
-
-    def as_dict(self) -> dict:
-        """Return dictionary representation."""
-        return {
-            "name": self.name,
-            "type": self.type,
-            "value": self.value,
-            "user_submitted": self.user_submitted,
-        }
-
-
-@dataclass
-class _CedarTestReport:
-    """Structure that holds test report for Cedar."""
-
-    test_name: str
-    thread_level: int
-    metrics: List[_CedarMetric]
-
-    def as_dict(self) -> dict:
-        """Return dictionary representation."""
-        return {
-            "info": {"test_name": self.test_name, "args": {"thread_level": self.thread_level, }},
-            "metrics": [metric.as_dict() for metric in self.metrics],
-        }
+from buildscripts.util.cedar_report import CedarMetric, CedarTestReport
 
 
 class CombineBenchmarkResults(interface.Hook):
@@ -140,8 +105,8 @@ class CombineBenchmarkResults(interface.Hook):
                     raise CedarReportError(msg)
 
             for threads_count, thread_metrics in cedar_metrics.items():
-                test_report = _CedarTestReport(test_name=name, thread_level=threads_count,
-                                               metrics=thread_metrics)
+                test_report = CedarTestReport(test_name=name, thread_level=threads_count,
+                                              metrics=thread_metrics)
                 cedar_report.append(test_report.as_dict())
 
         return cedar_report
@@ -275,7 +240,7 @@ class _BenchmarkThreadsReport(object):
 
         return res
 
-    def generate_cedar_metrics(self) -> Dict[int, List[_CedarMetric]]:
+    def generate_cedar_metrics(self) -> Dict[int, List[CedarMetric]]:
         """Generate metrics for Cedar."""
 
         res = {}
@@ -292,7 +257,7 @@ class _BenchmarkThreadsReport(object):
 
                 metric_type = self.BENCHMARK_TO_CEDAR_METRIC_TYPE_MAP[aggregate_name]
 
-                metric = _CedarMetric(name=metric_name, type=metric_type, value=report["cpu_time"])
+                metric = CedarMetric(name=metric_name, type=metric_type, value=report["cpu_time"])
                 threads = report["threads"]
                 if threads in res:
                     res[threads].append(metric)
@@ -302,7 +267,7 @@ class _BenchmarkThreadsReport(object):
         return res
 
     @staticmethod
-    def check_dup_metric_names(metrics: List[_CedarMetric]) -> bool:
+    def check_dup_metric_names(metrics: List[CedarMetric]) -> bool:
         """Check duplicated metric names for Cedar."""
         names = []
         for metric in metrics:
diff --git a/buildscripts/scons_metrics/__init__.py b/buildscripts/scons_metrics/__init__.py
new file mode 100644
index 00000000000..4b7a2bb941b
--- /dev/null
+++ b/buildscripts/scons_metrics/__init__.py
@@ -0,0 +1 @@
+"""Empty."""
diff --git a/buildscripts/scons_metrics/metrics.py b/buildscripts/scons_metrics/metrics.py
new file mode 100644
index 00000000000..1ba1f7825ad
--- /dev/null
+++ b/buildscripts/scons_metrics/metrics.py
@@ -0,0 +1,283 @@
+"""SCons metrics."""
+import re
+from typing import Optional, NamedTuple, List, Pattern, AnyStr
+
+from buildscripts.util.cedar_report import CedarMetric, CedarTestReport
+
+SCONS_METRICS_REGEX = re.compile(r"scons: done building targets\.((\n.*)*)", re.MULTILINE)
+
+MEMORY_BEFORE_READING_SCONSCRIPT_FILES_REGEX = re.compile(
+    r"Memory before reading SConscript files:(.+)")
+MEMORY_AFTER_READING_SCONSCRIPT_FILES_REGEX = re.compile(
+    r"Memory after reading SConscript files:(.+)")
+MEMORY_BEFORE_BUILDING_TARGETS_REGEX = re.compile(r"Memory before building targets:(.+)")
+MEMORY_AFTER_BUILDING_TARGETS_REGEX = re.compile(r"Memory after building targets:(.+)")
+OBJECT_COUNTS_REGEX = re.compile(r"Object counts:(\n.*)+Class\n(^[^:]+$)", re.MULTILINE)
+TOTAL_BUILD_TIME_REGEX = re.compile(r"Total build time:(.+)seconds")
+TOTAL_SCONSCRIPT_FILE_EXECUTION_TIME_REGEX = re.compile(
+    r"Total SConscript file execution time:(.+)seconds")
+TOTAL_SCONS_EXECUTION_TIME_REGEX = re.compile(r"Total SCons execution time:(.+)seconds")
+TOTAL_COMMAND_EXECUTION_TIME_REGEX = re.compile(r"Total command execution time:(.+)seconds")
+
+CACHE_HIT_RATIO_REGEX = re.compile(r"(?s)\.*hit rate: (\d+\.\d+)%(?!.*hit rate: (\d+\.\d+)%)")
+
+DEFAULT_CEDAR_METRIC_TYPE = "THROUGHPUT"
+
+
+class ObjectCountsMetric(NamedTuple):
+    """Class representing Object counts metric."""
+
+    class_: Optional[str]
+    pre_read: Optional[int]
+    post_read: Optional[int]
+    pre_build: Optional[int]
+    post_build: Optional[int]
+
+    def as_cedar_report(self) -> CedarTestReport:
+        """Return cedar report representation."""
+        metrics = [
+            CedarMetric(
+                name="pre-read object count",
+                type=DEFAULT_CEDAR_METRIC_TYPE,
+                value=self.pre_read,
+            ),
+            CedarMetric(
+                name="post-read object count",
+                type=DEFAULT_CEDAR_METRIC_TYPE,
+                value=self.post_read,
+            ),
+            CedarMetric(
+                name="pre-build object count",
+                type=DEFAULT_CEDAR_METRIC_TYPE,
+                value=self.pre_build,
+            ),
+            CedarMetric(
+                name="post-build object count",
+                type=DEFAULT_CEDAR_METRIC_TYPE,
+                value=self.post_build,
+            ),
+        ]
+
+        return CedarTestReport(
+            test_name=f"{self.class_} class",
+            thread_level=1,
+            metrics=metrics,
+        )
+
+
+class SconsMetrics:  # pylint: disable=too-many-instance-attributes
+    """Class representing SCons metrics."""
+
+    memory_before_reading_sconscript_files: Optional[int] = None
+    memory_after_reading_sconscript_files: Optional[int] = None
+    memory_before_building_targets: Optional[int] = None
+    memory_after_building_targets: Optional[int] = None
+    object_counts: List[ObjectCountsMetric] = None
+    total_build_time: Optional[float] = None
+    total_sconscript_file_execution_time: Optional[float] = None
+    total_scons_execution_time: Optional[float] = None
+    total_command_execution_time: Optional[float] = None
+    final_cache_hit_ratio: Optional[float] = None
+
+    def __init__(self, stdout_log_file, cache_debug_log_file):
+        """Init."""
+        with open(stdout_log_file, "r") as fh:
+            res = SCONS_METRICS_REGEX.search(fh.read())
+            self.raw_report = res.group(1).strip() if res else ""
+
+        if self.raw_report:
+            self.memory_before_reading_sconscript_files = self._parse_int(
+                MEMORY_BEFORE_READING_SCONSCRIPT_FILES_REGEX, self.raw_report)
+            self.memory_after_reading_sconscript_files = self._parse_int(
+                MEMORY_AFTER_READING_SCONSCRIPT_FILES_REGEX, self.raw_report)
+            self.memory_before_building_targets = self._parse_int(
+                MEMORY_BEFORE_BUILDING_TARGETS_REGEX, self.raw_report)
+            self.memory_after_building_targets = self._parse_int(
+                MEMORY_AFTER_BUILDING_TARGETS_REGEX, self.raw_report)
+
+            self.object_counts = self._parse_object_counts(OBJECT_COUNTS_REGEX, self.raw_report)
+
+            self.total_build_time = self._parse_float(TOTAL_BUILD_TIME_REGEX, self.raw_report)
+            self.total_sconscript_file_execution_time = self._parse_float(
+                TOTAL_SCONSCRIPT_FILE_EXECUTION_TIME_REGEX, self.raw_report)
+            self.total_scons_execution_time = self._parse_float(TOTAL_SCONS_EXECUTION_TIME_REGEX,
+                                                                self.raw_report)
+            self.total_command_execution_time = self._parse_float(
+                TOTAL_COMMAND_EXECUTION_TIME_REGEX, self.raw_report)
+
+        with open(cache_debug_log_file, "r") as fh:
+            self.final_cache_hit_ratio = self._parse_float(CACHE_HIT_RATIO_REGEX, fh.read())
+
+    def make_cedar_report(self) -> List[dict]:
+        """Format the data to look like a cedar report json."""
+        cedar_report = []
+        if not self.raw_report:
+            return cedar_report
+
+        if self.memory_before_reading_sconscript_files:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Memory before reading SConscript files",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="bytes",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.memory_before_reading_sconscript_files,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.memory_after_reading_sconscript_files:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Memory after reading SConscript files",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="bytes",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.memory_after_reading_sconscript_files,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.memory_before_building_targets:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Memory before building targets",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="bytes",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.memory_before_building_targets,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.memory_after_building_targets:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Memory after building targets",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="bytes",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.memory_after_building_targets,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.total_build_time:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Total build time",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="seconds",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.total_build_time,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.total_sconscript_file_execution_time:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Total SConscript file execution time",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="seconds",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.total_sconscript_file_execution_time,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.total_scons_execution_time:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Total SCons execution time",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="seconds",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.total_scons_execution_time,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.total_command_execution_time:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Total command execution time",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="seconds",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.total_command_execution_time,
+                        )
+                    ],
+                ).as_dict())
+
+        if self.object_counts:
+            for obj_counts in self.object_counts:
+                cedar_report.append(obj_counts.as_cedar_report().as_dict())
+
+        if self.final_cache_hit_ratio:
+            cedar_report.append(
+                CedarTestReport(
+                    test_name="Final cache hit ratio",
+                    thread_level=1,
+                    metrics=[
+                        CedarMetric(
+                            name="percent",
+                            type=DEFAULT_CEDAR_METRIC_TYPE,
+                            value=self.final_cache_hit_ratio,
+                        ),
+                    ],
+                ).as_dict())
+
+        return cedar_report
+
+    @classmethod
+    def _parse_int(cls, regex: Pattern[AnyStr], raw_str: str) -> Optional[int]:
+        """Parse int value."""
+        res = regex.search(raw_str)
+        if res:
+            return int(res.group(1).strip())
+        return None
+
+    @classmethod
+    def _parse_float(cls, regex: Pattern[AnyStr], raw_str: str) -> Optional[float]:
+        """Parse float value."""
+        res = regex.search(raw_str)
+        if res:
+            return float(res.group(1).strip())
+        return None
+
+    @classmethod
+    def _parse_object_counts(cls, regex: Pattern[AnyStr], raw_str: str) -> List[ObjectCountsMetric]:
+        """Parse object counts metrics."""
+        object_counts = []
+        res = regex.search(raw_str)
+        if res:
+            object_counts_raw = res.group(2)
+            for line in object_counts_raw.splitlines():
+                line_split = line.split()
+                if len(line_split) == 5:
+                    object_counts.append(
+                        ObjectCountsMetric(
+                            class_=line_split[4],
+                            pre_read=int(line_split[0]),
+                            post_read=int(line_split[1]),
+                            pre_build=int(line_split[2]),
+                            post_build=int(line_split[3]),
+                        ))
+        return object_counts
diff --git a/buildscripts/scons_metrics/report.py b/buildscripts/scons_metrics/report.py
new file mode 100644
index 00000000000..61b60ccc186
--- /dev/null
+++ b/buildscripts/scons_metrics/report.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""Make SCons metrics cedar report."""
+import json
+import os.path
+import sys
+
+import click
+
+from buildscripts.scons_metrics.metrics import SconsMetrics
+
+SCONS_STDOUT_LOG = "scons_stdout.log"
+SCONS_CACHE_DEBUG_LOG = "scons_cache.log"
+CEDAR_REPORT_FILE = "scons_cedar_report.json"
+
+
+@click.command()
+@click.option("--scons-stdout-log-file", default=SCONS_STDOUT_LOG, type=str,
+              help="Path to the file with SCons stdout logs.")
+@click.option("--scons-cache-debug-log-file", default=SCONS_CACHE_DEBUG_LOG, type=str,
+              help="Path to the file with SCons stdout logs.")
+@click.option("--cedar-report-file", default=CEDAR_REPORT_FILE, type=str,
+              help="Path to cedar report json file.")
+def main(scons_stdout_log_file: str, scons_cache_debug_log_file: str,
+         cedar_report_file: str) -> None:
+    """Read SCons stdout log file and write cedar report json file."""
+    scons_stdout_log_file = os.path.abspath(scons_stdout_log_file)
+    scons_cache_debug_log_file = os.path.abspath(scons_cache_debug_log_file)
+    cedar_report_file = os.path.abspath(cedar_report_file)
+
+    if not os.path.exists(scons_stdout_log_file):
+        print(f"Could not find SCons stdout log file '{scons_stdout_log_file}'.")
+        sys.exit(1)
+
+    if not os.path.exists(scons_cache_debug_log_file):
+        print(f"Could not find SCons cache debug log file '{scons_cache_debug_log_file}'.")
+        sys.exit(1)
+
+    scons_metrics = SconsMetrics(scons_stdout_log_file, scons_cache_debug_log_file)
+    if not scons_metrics.raw_report:
+        print(
+            f"Could not find raw metrics data in SCons stdout log file '{scons_stdout_log_file}'.")
+        sys.exit(1)
+
+    cedar_report = scons_metrics.make_cedar_report()
+    with open(cedar_report_file, "w") as fh:
+        json.dump(cedar_report, fh)
+        print(f"Done dumping cedar report json to file '{cedar_report_file}'.")
+
+
+if __name__ == '__main__':
+    main()  # pylint: disable=no-value-for-parameter
diff --git a/buildscripts/util/cedar_report.py b/buildscripts/util/cedar_report.py
new file mode 100644
index 00000000000..ccdd26a687a
--- /dev/null
+++ b/buildscripts/util/cedar_report.py
@@ -0,0 +1,38 @@
+"""Cedar report."""
+from dataclasses import dataclass
+from typing import Union, List
+
+
+@dataclass
+class CedarMetric:
+    """Structure that holds metrics for Cedar."""
+
+    name: str
+    type: str
+    value: Union[int, float]
+    user_submitted: bool = False
+
+    def as_dict(self) -> dict:
+        """Return dictionary representation."""
+        return {
+            "name": self.name,
+            "type": self.type,
+            "value": self.value,
+            "user_submitted": self.user_submitted,
+        }
+
+
+@dataclass
+class CedarTestReport:
+    """Structure that holds test report for Cedar."""
+
+    test_name: str
+    thread_level: int
+    metrics: List[CedarMetric]
+
+    def as_dict(self) -> dict:
+        """Return dictionary representation."""
+        return {
+            "info": {"test_name": self.test_name, "args": {"thread_level": self.thread_level, }},
+            "metrics": [metric.as_dict() for metric in self.metrics],
+        }
author	Mikhail Shchatko <mikhail.shchatko@mongodb.com>	2022-03-28 08:46:35 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-03-28 09:14:20 +0000
commit	f2adf371c8668950d02c26ba7ea38183cd0af473 (patch)
tree	a9a8aae2d70a1020186e4b638dea0565a3db3e1d /buildscripts
parent	df35160198cec8f1ab2f66f9c286e0ac377146c3 (diff)
download	mongo-f2adf371c8668950d02c26ba7ea38183cd0af473.tar.gz