SERVER-34210 fix display of benchmarks with multple arguments

author: Robert Guo <robert.guo@10gen.com> 2018-03-28 20:25:07 -0400
committer: Robert Guo <robert.guo@10gen.com> 2018-04-03 10:15:45 -0400
commit: 7a502a751e05af610619445367f1b5708a3d5491 (patch)
tree: 2515ef52bce01b9067460aaf85d0b91a7f7775b0 /buildscripts
parent: 6b8b6e0517dc4c06f4224b3644c55c586b347677 (diff)
download: mongo-7a502a751e05af610619445367f1b5708a3d5491.tar.gz
2 files changed, 111 insertions, 65 deletions
diff --git a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
index 7f1bea31cf3..5df43363330 100644
--- a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
+++ b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
@@ -68,8 +68,9 @@ class CombineBenchmarkResults(interface.Hook):
 
         for name, report in self.benchmark_reports.items():
             test_report = {
-                "name": name, "results": report.generate_perf_plugin_dict(),
-                "context": report.context._asdict()
+                "name": name,
+                "context": report.context._asdict(),
+                "results": report.generate_perf_plugin_dict()
             }
 
             perf_report["results"].append(test_report)
@@ -80,19 +81,23 @@ class CombineBenchmarkResults(interface.Hook):
         context = report_dict["context"]
 
         for benchmark_res in report_dict["benchmarks"]:
-            # The group name is the benchmark name minus the thread count and any statistics.
-            # Each group will be shown on a single perf graph.
-            group_name = benchmark_res["name"].rsplit("/", 1)[0]
+            bm_name_obj = _BenchmarkThreadsReport.parse_bm_name(benchmark_res["name"])
 
-            if group_name == benchmark_res["name"] and group_name.count("_") == 2:
-                # When running with only one thread, the thread count is not in the name;
-                # just remove the mean/median/stddev suffix in this case.
-                # With one thread, the group_name looks like: BM_MyTestNameInCamelCase_statistic.
-                group_name = group_name.rsplit("_", 1)[0]
+            # Don't show Benchmark's included statistics to prevent cluttering up the graph.
+            if bm_name_obj.statistic_type is not None:
+                continue
+
+            if bm_name_obj.base_name not in self.benchmark_reports:
+                self.benchmark_reports[bm_name_obj.base_name] = _BenchmarkThreadsReport(context)
+            self.benchmark_reports[bm_name_obj.base_name].add_report(bm_name_obj, benchmark_res)
 
-            if group_name not in self.benchmark_reports:
-                self.benchmark_reports[group_name] = _BenchmarkThreadsReport(context)
-            self.benchmark_reports[group_name].add_report(benchmark_res)
+
+# Capture information from a Benchmark name in a logical format.
+_BenchmarkName = collections.namedtuple("_BenchmarkName", [
+    "base_name",
+    "thread_count",
+    "statistic_type"
+]);
 
 
 class _BenchmarkThreadsReport(object):
@@ -133,9 +138,8 @@ class _BenchmarkThreadsReport(object):
         # list of benchmark runs for each thread.
         self.thread_benchmark_map = collections.defaultdict(list)
 
-    def add_report(self, report):
-        thread_count = self._thread_from_name(report["name"])
-        self.thread_benchmark_map[thread_count].append(report)
+    def add_report(self, bm_name_obj, report):
+        self.thread_benchmark_map[bm_name_obj.thread_count].append(report)
 
     def generate_perf_plugin_dict(self):
         """
@@ -158,12 +162,6 @@ class _BenchmarkThreadsReport(object):
 
         res = {}
         for thread_count, reports in self.thread_benchmark_map.items():
-            if (thread_count.endswith("median") or thread_count.endswith("mean")
-                    or thread_count.endswith("stddev")):
-                # We don't use Benchmark's included statistics for now because they clutter up the
-                # graph.
-                continue
-
             thread_report = {
                 "error_values": [0 for _ in range(len(reports))],
                 "ops_per_sec_values": []  # This is actually storing latency per op, not ops/s
@@ -179,16 +177,36 @@ class _BenchmarkThreadsReport(object):
         return res
 
     @staticmethod
-    def _thread_from_name(name):
-        # Get the thread from a string:
-        # "BM_SetInsert/arg name:1024/threads:10_mean" -> "10_mean"
-        # "BM_SetInsert" -> "1"
-        # "BM_SetInsert_mean" -> "1_mean"
-        thread_section = name.rsplit("/", 1)[-1]
+    def parse_bm_name(name_str):
+        """
+        Split the benchmark name into base_name, thread_count and statistic_type.
+
+        The base name is the benchmark name minus the thread count and any statistics.
+        Testcases of the same group will be shown on a single perf graph.
+
+        name_str look like the following:
+        "BM_SetInsert/arg name:1024/threads:10_mean"
+        "BM_SetInsert/arg 1/arg 2"
+        "BM_SetInsert_mean"
+        """
+
+        base_name = None
+        thread_count = None
+        statistic_type = None
+
+        # Step 1: get the statistic type.
+        if name_str.count("_") == 2:  # There is statistics.
+            statistic_type = name_str.rsplit("_", 1)[-1]
+            # Remove the statistic type suffix from the name.
+            name_str = name_str[:-len(statistic_type) - 1]
+
+        # Step 2: Get the thread count and name.
+        thread_section = name_str.rsplit("/", 1)[-1]
         if thread_section.startswith("threads:"):
-            return thread_section.split(":")[-1]
-        else:
-            if name.count("_") == 2:
-                suffix = name.split("_")[-1]
-                return "1_" + suffix
-            return "1"
+            base_name = name_str.rsplit("/", 1)[0]
+            thread_count = thread_section.split(":")[-1]
+        else:  # There is no explicit thread count, so the thread count is 1.
+            thread_count = "1"
+            base_name = name_str
+
+        return _BenchmarkName(base_name, thread_count, statistic_type)
diff --git a/buildscripts/tests/resmokelib/testing/hooks/test_combine_benchmark_results.py b/buildscripts/tests/resmokelib/testing/hooks/test_combine_benchmark_results.py
index ad2f331d816..34f3eded3b9 100755
--- a/buildscripts/tests/resmokelib/testing/hooks/test_combine_benchmark_results.py
+++ b/buildscripts/tests/resmokelib/testing/hooks/test_combine_benchmark_results.py
@@ -7,7 +7,6 @@ import unittest
 
 import mock
 
-import buildscripts.resmokelib.config as _config
 import buildscripts.resmokelib.testing.hooks.combine_benchmark_results as cbr
 
 _BM_CONTEXT = {
@@ -15,13 +14,19 @@ _BM_CONTEXT = {
     "cpu_scaling_enabled": False, "library_build_type": "debug"
 }
 
-_BM_REPORT = {
-    "name": "BM_Name1", "iterations": 1000, "real_time": 1202, "cpu_time": 1303,
+_BM_REPORT_1 = {
+    "name": "BM_Name1/arg1/arg with space", "iterations": 1000, "real_time": 1202, "cpu_time": 1303,
     "bytes_per_second": 1404, "items_per_second": 1505, "custom_counter_1": 1606
 }
 
+_BM_REPORT_2 = {
+    "name": "BM_Name1/arg1/arg with space", "iterations": 1000, "real_time": 1204, "cpu_time": 1305,
+    "bytes_per_second": 1406, "items_per_second": 1507, "custom_counter_1": 1608
+}
+
 _BM_MEAN_REPORT = {
-    "name": "BM_Name1_mean", "iterations": 1000, "real_time": 1200, "cpu_time": 1300,
+    "name": "BM_Name1/arg1/arg with space_mean", "iterations": 1000, "real_time": 1200,
+    "cpu_time": 1300,
     "bytes_per_second": 1400, "items_per_second": 1500, "custom_counter_1": 1600
 }
 
@@ -36,10 +41,9 @@ _BM_MULTITHREAD_MEDIAN_REPORT = {
 }
 
 _BM_FULL_REPORT = {
-    "context":
-        _BM_CONTEXT, "benchmarks": [
-            _BM_REPORT, _BM_MEAN_REPORT, _BM_MULTITHREAD_REPORT, _BM_MULTITHREAD_MEDIAN_REPORT
-        ]
+    "context": _BM_CONTEXT,
+    "benchmarks": [_BM_REPORT_1, _BM_REPORT_2, _BM_MEAN_REPORT, _BM_MULTITHREAD_REPORT,
+                   _BM_MULTITHREAD_MEDIAN_REPORT]
 }
 
 # 12/31/2999 @ 11:59pm (UTC)
@@ -72,6 +76,7 @@ class TestCombineBenchmarkResults(CombineBenchmarkResultsFixture):
         self.assertEqual(len(report["results"]), 2)
 
         self.assertDictEqual(report["results"][0]["context"], _BM_CONTEXT)
+        self.assertEqual(report["results"][0]["results"]["1"]["ops_per_sec"], -1304.0)
 
         self.assertEqual(report["start"], "2999-12-31T23:59:59Z")
         self.assertEqual(report["end"], "3000-01-01T00:00:00Z")
@@ -79,29 +84,44 @@ class TestCombineBenchmarkResults(CombineBenchmarkResultsFixture):
 
 class TestBenchmarkThreadsReport(CombineBenchmarkResultsFixture):
     def test_thread_from_name(self):
-        thread = self.bm_threads_report._thread_from_name("BM_Name/arg name:100/threads:10")
-        self.assertEqual(thread, "10")
-
-        thread = self.bm_threads_report._thread_from_name("BM_Name/arg name:100/threads:10_mean")
-        self.assertEqual(thread, "10_mean")
-
-        thread = self.bm_threads_report._thread_from_name("BM_Name/threads:abcd")
-        self.assertEqual(thread, "abcd")
-
-        thread = self.bm_threads_report._thread_from_name("BM_Name")
-        self.assertEqual(thread, "1")
-
-        thread = self.bm_threads_report._thread_from_name("BM_Name_mean")
-        self.assertEqual(thread, "1_mean")
-
-        thread = self.bm_threads_report._thread_from_name("BM_Name/arg name:100")
-        self.assertEqual(thread, "1")
+        name_obj = self.bm_threads_report.parse_bm_name("BM_Name/arg name:100/threads:10")
+        self.assertEqual(name_obj.thread_count, "10")
+        self.assertEqual(name_obj.statistic_type, None)
+        self.assertEqual(name_obj.base_name, "BM_Name/arg name:100")
+
+        name_obj = self.bm_threads_report.parse_bm_name("BM_Name/arg name:100/threads:10_mean")
+        self.assertEqual(name_obj.thread_count, "10")
+        self.assertEqual(name_obj.statistic_type, "mean")
+        self.assertEqual(name_obj.base_name, "BM_Name/arg name:100")
+
+        name_obj = self.bm_threads_report.parse_bm_name("BM_Name/threads:abcd")
+        self.assertEqual(name_obj.thread_count, "abcd")
+        self.assertEqual(name_obj.statistic_type, None)
+        self.assertEqual(name_obj.base_name, "BM_Name")
+
+        name_obj = self.bm_threads_report.parse_bm_name("BM_Name")
+        self.assertEqual(name_obj.thread_count, "1")
+        self.assertEqual(name_obj.statistic_type, None)
+        self.assertEqual(name_obj.base_name, "BM_Name")
+
+        name_obj = self.bm_threads_report.parse_bm_name("BM_Name/1/eeee_mean")
+        self.assertEqual(name_obj.thread_count, "1")
+        self.assertEqual(name_obj.statistic_type, "mean")
+        self.assertEqual(name_obj.base_name, "BM_Name/1/eeee")
+
+        name_obj = self.bm_threads_report.parse_bm_name("BM_Name/arg name:100")
+        self.assertEqual(name_obj.thread_count, "1")
+        self.assertEqual(name_obj.statistic_type, None)
+        self.assertEqual(name_obj.base_name, "BM_Name/arg name:100")
 
     def test_generate_multithread_perf_plugin_dict(self):
         # Also test add_report() in the process.
-        self.bm_threads_report.add_report(_BM_MULTITHREAD_REPORT)
-        self.bm_threads_report.add_report(_BM_MULTITHREAD_MEDIAN_REPORT)
-        self.assertEqual(len(self.bm_threads_report.thread_benchmark_map.keys()), 2)
+        self.bm_threads_report.add_report(
+            self.bm_threads_report.parse_bm_name(_BM_MULTITHREAD_REPORT["name"]),
+            _BM_MULTITHREAD_REPORT
+        )
+
+        self.assertEqual(len(self.bm_threads_report.thread_benchmark_map.keys()), 1)
 
         report = self.bm_threads_report.generate_perf_plugin_dict()
 
@@ -114,9 +134,17 @@ class TestBenchmarkThreadsReport(CombineBenchmarkResultsFixture):
         self.assertEqual(report["10"]["ops_per_sec"], -303.0)
 
     def test_generate_single_thread_perf_plugin_dict(self):
-        self.bm_threads_report.add_report(_BM_REPORT)
-        self.bm_threads_report.add_report(_BM_MEAN_REPORT)
-        self.assertEqual(len(self.bm_threads_report.thread_benchmark_map.keys()), 2)
+        self.bm_threads_report.add_report(
+            self.bm_threads_report.parse_bm_name(_BM_REPORT_1["name"]),
+            _BM_REPORT_1
+        )
+
+        self.bm_threads_report.add_report(
+            self.bm_threads_report.parse_bm_name(_BM_REPORT_2["name"]),
+            _BM_REPORT_2
+        )
+
+        self.assertEqual(len(self.bm_threads_report.thread_benchmark_map.keys()), 1)
 
         report = self.bm_threads_report.generate_perf_plugin_dict()
author	Robert Guo <robert.guo@10gen.com>	2018-03-28 20:25:07 -0400
committer	Robert Guo <robert.guo@10gen.com>	2018-04-03 10:15:45 -0400
commit	7a502a751e05af610619445367f1b5708a3d5491 (patch)
tree	2515ef52bce01b9067460aaf85d0b91a7f7775b0 /buildscripts
parent	6b8b6e0517dc4c06f4224b3644c55c586b347677 (diff)
download	mongo-7a502a751e05af610619445367f1b5708a3d5491.tar.gz