testsuite: Output performance test results in tabular format

this was suggested in #18417. Change the print format of the values. * Shorten commit hash * Reduce precision of the "Value" field * Shorten metrics name * e.g. runtime/bytes allocated -> run/alloc * Shorten "MetricsChange" * e.g. unchanged -> unch, increased -> incr And, print the baseline environment if there are baselines that were measured in a different environment than the current environment. If all "Baseline commit" are the same, print it once.
author: Daishi Nakajima <nakaji.dayo@gmail.com> 2020-08-30 19:06:36 +0900
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-09-08 15:42:41 -0400
commit: d7b2f799469a969ad7a2535be57f105186946c40 (patch)
tree: 115b3bcca9addbdf5f581ed7db21c5e7768e6c55 /testsuite/driver
parent: ea1cbb8f2ac9e077ed19530911c3a35c5f46ee8a (diff)
download: haskell-d7b2f799469a969ad7a2535be57f105186946c40.tar.gz
3 files changed, 98 insertions, 35 deletions
diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py
index 2bc88cd27c..fb4461ec81 100644
--- a/testsuite/driver/perf_notes.py
+++ b/testsuite/driver/perf_notes.py
@@ -22,7 +22,7 @@ import sys
 from collections import namedtuple
 from math import ceil, trunc
 
-from testutil import passed, failBecause, testing_metrics
+from testutil import passed, failBecause, testing_metrics, print_table
 from term_color import Color, colored
 
 from my_typing import *
@@ -45,6 +45,14 @@ def inside_git_repo() -> bool:
 def is_worktree_dirty() -> bool:
     return subprocess.check_output(['git', 'status', '--porcelain']) != b''
 
+# Get length of abbreviated git commit hash
+def get_abbrev_hash_length() -> int:
+    try:
+        return len(subprocess.check_output(['git', 'rev-parse',
+                                            '--short', 'HEAD']).strip())
+    except subprocess.CalledProcessError:
+        return 10
+
 #
 # Some data access functions. At the moment this uses git notes.
 #
@@ -100,6 +108,15 @@ class MetricChange(Enum):
         }
         return strings[self]
 
+    def short_name(self):
+        strings = {
+            MetricChange.NewMetric: "new",
+            MetricChange.NoChange:  "unch",
+            MetricChange.Increase:  "incr",
+            MetricChange.Decrease:  "decr"
+        }
+        return strings[self]
+
 AllowedPerfChange = NamedTuple('AllowedPerfChange',
                                [('direction', MetricChange),
                                 ('metrics', List[str]),
@@ -758,7 +775,7 @@ def main() -> None:
         exit(0)
 
     #
-    # String utilities for pretty-printing
+    # Print the data in tablular format
     #
 
     #                  T1234                 T1234
@@ -770,11 +787,12 @@ def main() -> None:
     # HEAD~1           10023                 10023
     # HEAD~2           21234                 21234
     # HEAD~3           20000                 20000
-
-    # Data is already in colum major format, so do that, calculate column widths
-    # then transpose and print each row.
     def strMetric(x):
         return '{:.2f}'.format(x.value) if x != None else ""
+    # Data is in colum major format, so transpose and pass to print_table.
+    T = TypeVar('T')
+    def transpose(xss: List[List[T]]) -> List[List[T]]:
+        return list(map(list, zip(*xss)))
 
     headerCols = [ ["","","","Commit"] ] \
                 + [ [name, metric, way, env] for (env, name, metric, way) in testSeries ]
@@ -782,17 +800,7 @@ def main() -> None:
                 + [ [strMetric(get_commit_metric(ref, commit, env, name, metric, way)) \
                         for commit in commits ] \
                         for (env, name, metric, way) in testSeries ]
-    colWidths = [max([2+len(cell) for cell in colH + colD]) for (colH,colD) in zip(headerCols, dataCols)]
-    col_fmts = ['{:>' + str(w) + '}' for w in colWidths]
-
-    def printCols(cols):
-        for row in zip(*cols):
-            # print(list(zip(col_fmts, row)))
-            print(''.join([f.format(cell) for (f,cell) in zip(col_fmts, row)]))
-
-    printCols(headerCols)
-    print('-'*(sum(colWidths)+2))
-    printCols(dataCols)
+    print_table(transpose(headerCols), transpose(dataCols))
 
 if __name__ == '__main__':
     main()
diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py
index a7e689df3b..d9a0bf6b59 100644
--- a/testsuite/driver/runtests.py
+++ b/testsuite/driver/runtests.py
@@ -23,11 +23,11 @@ import traceback
 # So we import it here first, so that the testsuite doesn't appear to fail.
 import subprocess
 
-from testutil import getStdout, Watcher, str_warn, str_info
+from testutil import getStdout, Watcher, str_warn, str_info, print_table, shorten_metric_name
 from testglobals import getConfig, ghc_env, getTestRun, TestConfig, \
                         TestOptions, brokens, PerfMetric
 from my_typing import TestName
-from perf_notes import MetricChange, GitRef, inside_git_repo, is_worktree_dirty, format_perf_stat
+from perf_notes import MetricChange, GitRef, inside_git_repo, is_worktree_dirty, format_perf_stat, get_abbrev_hash_length, is_commit_hash
 from junit import junit
 import term_color
 from term_color import Color, colored
@@ -341,23 +341,52 @@ def cleanup_and_exit(exitcode):
     exit(exitcode)
 
 def tabulate_metrics(metrics: List[PerfMetric]) -> None:
-    for metric in sorted(metrics, key=lambda m: (m.stat.test, m.stat.way, m.stat.metric)):
-        print("{test:24}  {metric:40}  {value:15.3f}".format(
-            test = "{}({})".format(metric.stat.test, metric.stat.way),
-            metric = metric.stat.metric,
-            value = metric.stat.value
-        ))
-        if metric.baseline is not None:
-            val0 = metric.baseline.perfStat.value
-            val1 = metric.stat.value
-            rel = 100 * (val1 - val0) / val0
-            print("{space:24}  {herald:40}  {value:15.3f}  [{direction}, {rel:2.1f}%]".format(
-                space = "",
-                herald = "(baseline @ {commit})".format(
-                    commit = metric.baseline.commit),
-                value = val0,
-                direction = metric.change,
-                rel = rel
+    abbrevLen = get_abbrev_hash_length()
+    hasBaseline = any([x.baseline is not None for x in metrics])
+    baselineCommitSet = set([x.baseline.commit for x in metrics if x.baseline is not None])
+    hideBaselineCommit = not hasBaseline or len(baselineCommitSet) == 1
+    hideBaselineEnv = not hasBaseline or all(
+        [x.stat.test_env == x.baseline.perfStat.test_env
+         for x in metrics if x.baseline is not None])
+    def row(cells: Tuple[str, str, str, str, str, str, str]) -> List[str]:
+        return [x for (idx, x) in enumerate(list(cells)) if
+                (idx != 2 or not hideBaselineCommit) and
+                (idx != 3 or not hideBaselineEnv )]
+
+    headerRows = [
+        row(("", "", "Baseline", "Baseline", "Baseline", "", "")),
+        row(("Test", "Metric", "commit", "environment", "value", "New value", "Change"))
+    ]
+    def strDiff(x: PerfMetric) -> str:
+        if x.baseline is None:
+            return ""
+        val0 = x.baseline.perfStat.value
+        val1 = x.stat.value
+        return "{}({:+2.1f}%)".format(x.change.short_name(), 100 * (val1 - val0) / val0)
+    dataRows = [row((
+        "{}({})".format(x.stat.test, x.stat.way),
+        shorten_metric_name(x.stat.metric),
+          "{}".format(x.baseline.commit[:abbrevLen]
+                      if is_commit_hash(x.baseline.commit) else x.baseline.commit)
+          if x.baseline is not None else "",
+        "{}".format(x.baseline.perfStat.test_env)
+          if x.baseline is not None else "",
+        "{:13.1f}".format(x.baseline.perfStat.value)
+          if x.baseline is not None else "",
+        "{:13.1f}".format(x.stat.value),
+        strDiff(x)
+    )) for x in sorted(metrics, key =
+                      lambda m: (m.stat.test, m.stat.way, m.stat.metric))]
+    print_table(headerRows, dataRows, 1)
+    print("")
+    if hasBaseline:
+        if hideBaselineEnv:
+            print("* All baselines were measured in the same environment as this test run")
+        if hideBaselineCommit:
+            commit = next(iter(baselineCommitSet))
+            print("* All baseline commits are {}".format(
+                commit[:abbrevLen]
+                if is_commit_hash(commit) else commit
             ))
 
 # First collect all the tests to be run
diff --git a/testsuite/driver/testutil.py b/testsuite/driver/testutil.py
index 531d973e61..074214639e 100644
--- a/testsuite/driver/testutil.py
+++ b/testsuite/driver/testutil.py
@@ -144,3 +144,29 @@ def memoize(f):
 
     cached._cache = None
     return cached
+
+# Print the matrix data in a tabular format.
+def print_table(header_rows: List[List[str]], data_rows: List[List[str]], padding=2) -> None:
+    # Calculate column widths then print each row.
+    colWidths = [(0 if idx == 0 else padding) + max([len(cell) for cell in col])
+                 for (idx, col) in enumerate(zip(*(header_rows + data_rows)))]
+    col_fmts = ['{:>' + str(w) + '}' for w in colWidths]
+
+    def printCols(cols):
+        for row in cols:
+            print(''.join([f.format(cell) for (f,cell) in zip(col_fmts, row)]))
+
+    printCols(header_rows)
+    print('-' * sum(colWidths))
+    printCols(data_rows)
+
+def shorten_metric_name(name: str) -> str:
+    dic = {
+        "runtime/bytes allocated": "run/alloc",
+        "runtime/peak_megabytes_allocated": "run/peak",
+        "runtime/max_bytes_used": "run/max",
+        "compile_time/bytes allocated": "ghc/alloc",
+        "compile_time/peak_megabytes_allocated": "ghc/peak",
+        "compile_time/max_bytes_used": "ghc/max",
+    }
+    return dic.get(name, name)
author	Daishi Nakajima <nakaji.dayo@gmail.com>	2020-08-30 19:06:36 +0900
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-09-08 15:42:41 -0400
commit	d7b2f799469a969ad7a2535be57f105186946c40 (patch)
tree	115b3bcca9addbdf5f581ed7db21c5e7768e6c55 /testsuite/driver
parent	ea1cbb8f2ac9e077ed19530911c3a35c5f46ee8a (diff)
download	haskell-d7b2f799469a969ad7a2535be57f105186946c40.tar.gz