1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
"""Utility to support parsing a TestStat."""
from collections import defaultdict
from collections import namedtuple
from typing import NamedTuple, List
import requests
from requests.adapters import HTTPAdapter, Retry
import buildscripts.util.testname as testname # pylint: disable=wrong-import-position
TESTS_STATS_S3_LOCATION = "https://mongo-test-stats.s3.amazonaws.com"
class HistoricalTestInformation(NamedTuple):
"""
Container for information about the historical runtime of a test.
test_name: Name of test.
avg_duration_pass: Average of runtime of test that passed.
num_pass: Number of times the test has passed.
num_fail: Number of times the test has failed.
"""
test_name: str
num_pass: int
num_fail: int
avg_duration_pass: float
TestRuntime = namedtuple('TestRuntime', ['test_name', 'runtime'])
def normalize_test_name(test_name):
"""Normalize test names that may have been run on windows or unix."""
return test_name.replace("\\", "/")
class TestStats(object):
"""Represent the test statistics for the task that is being analyzed."""
def __init__(self, evg_test_stats_results: List[HistoricalTestInformation]) -> None:
"""Initialize the TestStats with raw results from the Evergreen API."""
# Mapping from test_file to {"num_run": X, "duration": Y} for tests
self._runtime_by_test = defaultdict(dict)
# Mapping from 'test_name:hook_name' to
# {'test_name': {hook_name': {"num_run": X, "duration": Y}}}
self._hook_runtime_by_test = defaultdict(lambda: defaultdict(dict))
for doc in evg_test_stats_results:
self._add_stats(doc)
def _add_stats(self, test_stats: HistoricalTestInformation) -> None:
"""Add the statistics found in a document returned by the Evergreen test_stats/ endpoint."""
test_file = testname.normalize_test_file(test_stats.test_name)
duration = test_stats.avg_duration_pass
num_run = test_stats.num_pass
is_hook = testname.is_resmoke_hook(test_file)
if is_hook:
self._add_test_hook_stats(test_file, duration, num_run)
else:
self._add_test_stats(test_file, duration, num_run)
def _add_test_stats(self, test_file, duration, num_run):
"""Add the statistics for a test."""
runtime_info = self._runtime_by_test[test_file]
self._add_runtime_info(runtime_info, duration, num_run)
def _add_test_hook_stats(self, test_file, duration, num_run):
"""Add the statistics for a hook."""
test_name, hook_name = testname.split_test_hook_name(test_file)
runtime_info = self._hook_runtime_by_test[test_name][hook_name]
self._add_runtime_info(runtime_info, duration, num_run)
@staticmethod
def _add_runtime_info(runtime_info, duration, num_run):
if not runtime_info:
runtime_info["duration"] = duration
runtime_info["num_run"] = num_run
else:
runtime_info["duration"] = TestStats._average(
runtime_info["duration"], runtime_info["num_run"], duration, num_run)
runtime_info["num_run"] += num_run
@staticmethod
def _average(value_a, num_a, value_b, num_b):
"""Compute a weighted average of 2 values with associated numbers."""
divisor = num_a + num_b
if divisor == 0:
return 0
else:
return float(value_a * num_a + value_b * num_b) / divisor
def get_tests_runtimes(self):
"""Return the list of (test_file, runtime_in_secs) tuples ordered by decreasing runtime."""
tests = []
for test_file, runtime_info in list(self._runtime_by_test.items()):
duration = runtime_info["duration"]
test_name = testname.get_short_name_from_test_file(test_file)
for _, hook_runtime_info in self._hook_runtime_by_test[test_name].items():
duration += hook_runtime_info["duration"]
test = TestRuntime(test_name=normalize_test_name(test_file), runtime=duration)
tests.append(test)
return sorted(tests, key=lambda x: x.runtime, reverse=True)
def get_stats_from_s3(project: str, task: str, variant: str) -> List[HistoricalTestInformation]:
"""
Retrieve test stats from s3 for a given task.
:param project: Project to query.
:param task: Task to query.
:param variant: Build variant to query.
:return: A list of the Test stats for the specified task.
"""
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
response = session.get(f"{TESTS_STATS_S3_LOCATION}/{project}/{variant}/{task}")
data = response.json()
return [HistoricalTestInformation(**item) for item in data]
|