summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuildscripts/evergreen_gen_build_metrics_tasks.py124
-rw-r--r--etc/evergreen_yml_components/definitions.yml19
-rw-r--r--evergreen/build_metric_cedar_report.py9
-rw-r--r--site_scons/site_tools/build_metrics/combine_metrics.py238
-rw-r--r--site_scons/site_tools/build_metrics/combine_metrics_unittest.py78
5 files changed, 424 insertions, 44 deletions
diff --git a/buildscripts/evergreen_gen_build_metrics_tasks.py b/buildscripts/evergreen_gen_build_metrics_tasks.py
index 63b2314a48f..5cba9c832ac 100755
--- a/buildscripts/evergreen_gen_build_metrics_tasks.py
+++ b/buildscripts/evergreen_gen_build_metrics_tasks.py
@@ -10,59 +10,90 @@ from shrub.v2.command import BuiltInCommand
def main():
tasks = {
- 'windows_tasks': [],
- 'linux_x86_64_tasks': [],
- 'linux_arm64_tasks': [],
- 'macos_tasks': [],
+ 'windows_tasks': {},
+ 'linux_x86_64_tasks': {},
+ 'linux_arm64_tasks': {},
+ 'macos_tasks': {},
}
- def create_build_metric_task_steps(task_build_flags, task_targets):
+ tasks_prefixes = {
+ 'windows_tasks': 'build_metrics_msvc',
+ 'linux_x86_64_tasks': 'build_metrics_x86_64',
+ 'linux_arm64_tasks': 'build_metrics_arm64',
+ 'macos_tasks': 'build_metrics_xcode',
+ }
+
+ task_group_targets = {
+ 'dynamic': [
+ "install-devcore",
+ "install-all-meta generate-libdeps-graph",
+ ], "static": [
+ "install-devcore",
+ "install-all-meta-but-not-unittests",
+ ]
+ }
+
+ def create_build_metric_task_steps(task_build_flags, task_targets, split_num):
evg_flags = f"--debug=time,count,memory VARIANT_DIR=metrics BUILD_METRICS_EVG_TASK_ID={os.environ['task_id']} BUILD_METRICS_EVG_BUILD_VARIANT={os.environ['build_variant']}"
- cache_flags = "--cache-dir=$PWD/scons-cache --cache-signature-mode=validate"
+ cache_flags = "--cache-dir=$PWD/scons-cache-{split_num} --cache-signature-mode=validate"
scons_task_steps = [
- f"{evg_flags} --build-metrics=build_metrics.json",
- f"{evg_flags} {cache_flags} --cache-populate --build-metrics=populate_cache.json",
+ f"{evg_flags} --build-metrics=build_metrics_{split_num}.json",
+ f"{evg_flags} {cache_flags} --cache-populate --build-metrics=populate_cache_{split_num}.json",
f"{evg_flags} --clean",
- f"{evg_flags} {cache_flags} --build-metrics=pull_cache.json",
+ f"{evg_flags} {cache_flags} --build-metrics=pull_cache_{split_num}.json",
]
task_steps = [
FunctionCall(
"scons compile", {
- "task_compile_flags": f"{task_build_flags} {step_flags}",
+ "patch_compile_flags": f"{task_build_flags} {step_flags}",
"targets": task_targets,
"compiling_for_test": "true",
}) for step_flags in scons_task_steps
]
- task_steps.append(FunctionCall("attach build metrics"))
- task_steps.append(FunctionCall("print top N metrics"))
return task_steps
+ def create_build_metric_task_list(task_list, link_model, build_flags):
+
+ tasks[task_list][link_model] = []
+ prefix = tasks_prefixes[task_list]
+ index = 0
+ for index, target in enumerate(task_group_targets[link_model]):
+ tasks[task_list][link_model].append(
+ Task(f"{prefix}_{link_model}_build_split_{index}_{target.replace(' ', '_')}",
+ create_build_metric_task_steps(build_flags, target, index)))
+ tasks[task_list][link_model].append(
+ Task(f"{prefix}_{link_model}_build_split_{index+1}_combine_metrics", [
+ FunctionCall("combine build metrics"),
+ FunctionCall("attach build metrics"),
+ FunctionCall("print top N metrics")
+ ]))
+
#############################
if sys.platform == 'win32':
- targets = "install-all-meta-but-not-unittests"
build_flags = '--cache=nolinked'
- tasks['windows_tasks'].append(
- Task("build_metrics_msvc", create_build_metric_task_steps(build_flags, targets)))
+ create_build_metric_task_list(
+ 'windows_tasks',
+ 'static',
+ build_flags,
+ )
##############################
elif sys.platform == 'darwin':
for link_model in ['dynamic', 'static']:
- if link_model == 'dynamic':
- targets = "install-all-meta generate-libdeps-graph"
- else:
- targets = "install-all-meta-but-not-unittests"
build_flags = f"--link-model={link_model} --force-macos-dynamic-link" + (
- ' --cache=nolinked' if link_model == 'static' else "")
+ ' --cache=nolinked' if link_model == 'static' else " --cache=all")
- tasks['macos_tasks'].append(
- Task(f"build_metrics_xcode_{link_model}",
- create_build_metric_task_steps(build_flags, targets)))
+ create_build_metric_task_list(
+ 'macos_tasks',
+ link_model,
+ build_flags,
+ )
##############################
else:
@@ -71,26 +102,29 @@ def main():
for compiler in ['gcc']:
for link_model in ['dynamic', 'static']:
- if link_model == 'dynamic':
- targets = "install-all-meta generate-libdeps-graph"
- else:
- targets = "install-all-meta-but-not-unittests"
+ build_flags = (
+ "BUILD_METRICS_BLOATY=/opt/mongodbtoolchain/v4/bin/bloaty " +
+ f"--variables-files=etc/scons/mongodbtoolchain_{toolchain}_{compiler}.vars "
+ + f"--link-model={link_model}" +
+ (' --cache=nolinked' if link_model == 'static' else " --cache=all"))
- build_flags = f"BUILD_METRICS_BLOATY=/opt/mongodbtoolchain/v4/bin/bloaty --variables-files=etc/scons/mongodbtoolchain_{toolchain}_{compiler}.vars --link-model={link_model}" + (
- ' --cache=nolinked' if link_model == 'static' else "")
+ create_build_metric_task_list(
+ 'linux_x86_64_tasks',
+ link_model,
+ build_flags,
+ )
- tasks['linux_x86_64_tasks'].append(
- Task(f"build_metrics_x86_64_{toolchain}_{compiler}_{link_model}",
- create_build_metric_task_steps(build_flags, targets)))
- tasks['linux_arm64_tasks'].append(
- Task(f"build_metrics_arm64_{toolchain}_{compiler}_{link_model}",
- create_build_metric_task_steps(build_flags, targets)))
+ create_build_metric_task_list(
+ 'linux_arm64_tasks',
+ link_model,
+ build_flags,
+ )
def create_task_group(target_platform, tasks):
task_group = TaskGroup(
name=f'build_metrics_{target_platform}_task_group_gen',
tasks=tasks,
- max_hosts=len(tasks),
+ max_hosts=1,
setup_group=[
BuiltInCommand("manifest.load", {}),
FunctionCall("git get project and add git tag"),
@@ -137,29 +171,33 @@ def main():
activate=True,
)
variant.add_task_group(
- create_task_group('windows', tasks['windows_tasks']), ['windows-vsCurrent-large'])
+ create_task_group('windows', tasks['windows_tasks']['static']),
+ ['windows-vsCurrent-xlarge'])
elif sys.platform == 'darwin':
variant = BuildVariant(
name="macos-enterprise-build-metrics",
activate=True,
)
- variant.add_task_group(create_task_group('macos', tasks['macos_tasks']), ['macos-1100'])
+ for link_model, tasks in tasks['macos_tasks'].items():
+ variant.add_task_group(create_task_group(f'macos_{link_model}', tasks), ['macos-1100'])
else:
if platform.machine() == 'x86_64':
variant = BuildVariant(
name="enterprise-rhel-80-64-bit-build-metrics",
activate=True,
)
- variant.add_task_group(
- create_task_group('linux_X86_64', tasks['linux_x86_64_tasks']), ['rhel80-xlarge'])
+ for link_model, tasks in tasks['linux_x86_64_tasks'].items():
+ variant.add_task_group(
+ create_task_group(f'linux_X86_64_{link_model}', tasks), ['rhel80-xlarge'])
else:
variant = BuildVariant(
name="enterprise-rhel-80-aarch64-build-metrics",
activate=True,
)
- variant.add_task_group(
- create_task_group('linux_arm64', tasks['linux_arm64_tasks']),
- ['amazon2022-arm64-large'])
+ for link_model, tasks in tasks['linux_arm64_tasks'].items():
+ variant.add_task_group(
+ create_task_group(f'linux_arm64_{link_model}', tasks),
+ ['amazon2022-arm64-large'])
project = ShrubProject({variant})
with open('build_metrics_task_gen.json', 'w') as fout:
diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml
index 52b7c3125fa..82171f87ba4 100644
--- a/etc/evergreen_yml_components/definitions.yml
+++ b/etc/evergreen_yml_components/definitions.yml
@@ -2205,6 +2205,25 @@ functions:
params:
file_location: ${report_file|src/report.json}
+ "combine build metrics":
+ - command: subprocess.exec
+ params:
+ binary: bash
+ add_expansions_to_env: true
+ args:
+ - "src/evergreen/run_python_script.sh"
+ - "site_scons/site_tools/build_metrics/combine_metrics_unittest.py"
+ - command: subprocess.exec
+ params:
+ binary: bash
+ add_expansions_to_env: true
+ args:
+ - "src/evergreen/run_python_script.sh"
+ - "site_scons/site_tools/build_metrics/combine_metrics.py"
+ - "--prefix-name=build_metrics"
+ - "--prefix-name=populate_cache"
+ - "--prefix-name=pull_cache"
+
"print top N metrics":
- command: subprocess.exec
params:
diff --git a/evergreen/build_metric_cedar_report.py b/evergreen/build_metric_cedar_report.py
index af86580ff98..6c89e733ccb 100644
--- a/evergreen/build_metric_cedar_report.py
+++ b/evergreen/build_metric_cedar_report.py
@@ -61,7 +61,14 @@ with open(clean_build_metrics_json) as f:
]
})
- cedar_report.append(single_metric_test("SCons memory usage", "MBs", build_metrics['scons_metrics']['memory']['post_build'] / 1024.0 / 1024.0))
+ try:
+ cedar_report.append(single_metric_test("SCons memory usage", "MBs", build_metrics['scons_metrics']['memory']['post_build'] / 1024.0 / 1024.0))
+ except KeyError:
+ if sys.platform == 'darwin':
+ # MacOS has known memory reporting issues, although this is not directly related to scons which does not use
+ # psutil for this case, I think both use underlying OS calls to determine the memory: https://github.com/giampaolo/psutil/issues/1908
+ pass
+
cedar_report.append(single_metric_test("System Memory Peak", "MBs", build_metrics['system_memory']['max'] / 1024.0 / 1024.0))
cedar_report.append(single_metric_test("Total Build time", "seconds", build_metrics['scons_metrics']['time']['total']))
cedar_report.append(single_metric_test("Total Build output size", "MBs", build_metrics['artifact_metrics']['total_artifact_size'] / 1024.0 / 1024.0))
diff --git a/site_scons/site_tools/build_metrics/combine_metrics.py b/site_scons/site_tools/build_metrics/combine_metrics.py
new file mode 100644
index 00000000000..6dae88f7f1a
--- /dev/null
+++ b/site_scons/site_tools/build_metrics/combine_metrics.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+import json
+import sys
+import glob
+import argparse
+import statistics
+
+from typing import Dict, List, Any
+
+parser = argparse.ArgumentParser(description='Combine metrics json files into a single file.')
+parser.add_argument(
+ '--prefix-name', metavar='FILES', action='append', default=[], help=
+ 'Prefix path to collect json files of the form "{prefix_path}*.json" for combining into a single json: "{prefix_path}.json"'
+)
+parser.add_argument('unittest_args', nargs='*')
+args = parser.parse_args()
+
+
+def set_lowest(existing: Dict, current: Dict, key: str):
+ existing_data = existing.get(key)
+ current_data = existing.get(key)
+
+ if existing_data and current_data and existing_data > current_data:
+ existing[key] = current_data
+
+ elif not existing_data and current_data:
+ existing[key] = current_data
+
+
+def set_greatest(existing: Dict, current: Dict, key: str):
+ existing_data = existing.get(key)
+ current_data = current.get(key)
+
+ if existing_data and current_data and existing_data < current_data:
+ existing[key] = current_data
+
+ elif not existing_data and current_data:
+ existing[key] = current_data
+
+
+def combine_command_line(existing: Dict, current: Dict, key: str):
+ existing_data = existing.get(key)
+ current_data = current.get(key)
+
+ if not existing_data:
+ existing[key] = current_data
+ else:
+ existing_data = existing.get(key).split()
+ current_data = current.get(key).split()
+ for current_arg in current_data:
+ if current_arg not in existing_data:
+ existing_data.append(current_arg)
+
+ existing[key] = ' '.join(existing_data)
+
+
+def if_set_should_match(existing: Dict, current: Dict, key: str):
+ existing_data = existing.get(key)
+ current_data = current.get(key)
+
+ if existing_data and current_data and existing_data != current_data:
+ raise Exception(
+ f"Expected data to match - existing: {existing_data}, current: {current_data}")
+
+ elif not existing_data and current_data:
+ existing[key] = current_data
+
+
+def recalc_list_indexes(target_list: List):
+ index_found = None
+
+ for index, elem in enumerate(target_list):
+ if index_found is None and index == 0:
+ index_found = elem.get('array_index')
+
+ if (index_found is None
+ and elem.get('array_index')) or (index_found is not None
+ and elem.get('array_index') is None):
+ raise Exception("Attempted to combine list with incompat index keys.")
+
+ if elem.get('array_index') is not None:
+ elem['array_index'] = index
+
+
+def extend_list(existing: Dict, current: Dict, key: str):
+ existing_data = existing.get(key)
+ current_data = current.get(key)
+
+ if existing_data and current_data:
+ existing_data.extend(current_data)
+
+ elif not existing_data and current_data:
+ existing[key] = current_data
+
+ recalc_list_indexes(existing[key])
+
+
+def extend_list_no_dups(existing: Dict, current: Dict, key: str, list_unqiue_key: str):
+ extend_list(existing, current, key)
+ unique_list = {}
+ for elem in existing[key]:
+ if elem.get('array_index') is not None:
+ elem['array_index'] = -1
+ if elem[list_unqiue_key] not in unique_list:
+ unique_list[elem[list_unqiue_key]] = elem
+ elif unique_list[elem[list_unqiue_key]] != elem:
+ if sys.platform == 'win32':
+ # build metrics performs a clean and pull from cachse and windows does not produce the same output
+ # with the same input (non deterministic), so we can not make these garuntees and or perform
+ # this check.
+ pass
+ else:
+ raise Exception(
+ f"Expected data to match - existing: {unique_list[elem[list_unqiue_key]]}, current: {elem}"
+ )
+
+ existing[key] = list(unique_list.values())
+
+ recalc_list_indexes(existing[key])
+
+
+def combine_system_memory(existing: Dict, current: Dict):
+
+ extend_list(existing, current, 'mem_over_time')
+ set_greatest(existing, current, 'max')
+ existing['arithmetic_mean'] = statistics.mean(
+ [mem['memory'] for mem in existing['mem_over_time']])
+ set_lowest(existing, current, 'start_mem')
+
+
+def combine_artifact_metrics(existing: Dict, current: Dict):
+ extend_list_no_dups(existing, current, 'artifacts', 'name')
+ existing['total_artifact_size'] = sum([artifact['size'] for artifact in existing['artifacts']])
+ existing['num_artifacts'] = len(existing['artifacts'])
+
+
+def combine_cache_metrics(existing: Dict, current: Dict):
+ extend_list_no_dups(existing, current, 'cache_artifacts', 'name')
+ existing['push_time'] += current['push_time']
+ existing['pull_time'] += current['pull_time']
+ existing['cache_size'] += sum([cache['size'] for cache in existing['cache_artifacts']])
+
+
+def combine_scons_metrics(existing: Dict, current: Dict):
+ try:
+ set_greatest(existing['memory'], current['memory'], 'pre_read')
+ set_greatest(existing['memory'], current['memory'], 'post_read')
+ set_greatest(existing['memory'], current['memory'], 'pre_build')
+ set_greatest(existing['memory'], current['memory'], 'post_build')
+ except KeyError:
+ if sys.platform == 'darwin':
+ # MacOS has known memory reporting issues, although this is not directly related to scons which does not use
+ # psutil for this case, I think both use underlying OS calls to determine the memory: https://github.com/giampaolo/psutil/issues/1908
+ pass
+
+ existing['time']['total'] += current['time']['total']
+ existing['time']['sconscript_exec'] += current['time']['sconscript_exec']
+ existing['time']['scons_exec'] += current['time']['scons_exec']
+ existing['time']['command_exec'] += current['time']['command_exec']
+
+ for new_item in current['counts']:
+ found_new_item = False
+ for existing_item in existing['counts']:
+ if existing_item['item_name'] == new_item['item_name']:
+ found_new_item = True
+ set_greatest(existing_item, new_item, 'pre_read')
+ set_greatest(existing_item, new_item, 'post_read')
+ set_greatest(existing_item, new_item, 'pre_build')
+ set_greatest(existing_item, new_item, 'post_build')
+ break
+ if not found_new_item:
+ existing['counts'].append(new_item)
+
+
+for prefix_name in args.prefix_name:
+
+ combined_json: Dict[str, Any] = {'combined_files': []}
+
+ json_files = glob.glob(f'{prefix_name}*.json')
+ for json_file in json_files:
+ if json_file.endswith('chrome-tracer.json'):
+ continue
+
+ with open(json_file) as fjson:
+ combined_json['combined_files'].append(json_file)
+ current_json = json.load(fjson)
+
+ set_lowest(combined_json, current_json, 'start_time')
+ set_greatest(combined_json, current_json, 'end_time')
+ if_set_should_match(combined_json, current_json, 'evg_id')
+ if_set_should_match(combined_json, current_json, 'variant')
+ combine_command_line(combined_json, current_json, 'scons_command')
+
+ ###########################
+ # system_memory
+ if 'system_memory' not in combined_json:
+ combined_json['system_memory'] = current_json.get('system_memory', {})
+ else:
+ combine_system_memory(combined_json['system_memory'], current_json['system_memory'])
+
+ ############################
+ # artifact_metrics
+ if 'artifact_metrics' not in combined_json:
+ combined_json['artifact_metrics'] = current_json.get('artifact_metrics', {})
+ else:
+ combine_artifact_metrics(combined_json['artifact_metrics'],
+ current_json['artifact_metrics'])
+
+ ############################
+ # build_tasks
+ if 'build_tasks' not in combined_json:
+ combined_json['build_tasks'] = current_json.get('build_tasks', [])
+ else:
+ extend_list(combined_json, current_json, 'build_tasks')
+
+ ############################
+ # cache_metrics
+ if 'cache_metrics' not in combined_json:
+ combined_json['cache_metrics'] = current_json.get('cache_metrics', {})
+ else:
+ combine_cache_metrics(combined_json['cache_metrics'], current_json['cache_metrics'])
+
+ ############################
+ # libdeps_metrics
+ if 'libdeps_metrics' in combined_json and current_json.get('libdeps_metrics'):
+ raise Exception("found a second libdeps_metrics dataset in {json_file}")
+ if 'libdeps_metrics' not in combined_json and current_json.get('libdeps_metrics'):
+ combined_json['libdeps_metrics'] = current_json.get('libdeps_metrics')
+
+ ############################
+ # scons_metrics
+ if 'scons_metrics' not in combined_json:
+ combined_json['scons_metrics'] = current_json.get('scons_metrics', {})
+ else:
+ combine_scons_metrics(combined_json['scons_metrics'], current_json['scons_metrics'])
+
+ with open(f'{prefix_name}.json', 'w') as out:
+ json.dump(combined_json, out, indent=4, sort_keys=True)
diff --git a/site_scons/site_tools/build_metrics/combine_metrics_unittest.py b/site_scons/site_tools/build_metrics/combine_metrics_unittest.py
new file mode 100644
index 00000000000..3fab38a2127
--- /dev/null
+++ b/site_scons/site_tools/build_metrics/combine_metrics_unittest.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+import sys
+import os
+import unittest
+
+sys.path.append(os.path.dirname(__file__))
+
+from combine_metrics import set_lowest, set_greatest, combine_command_line, if_set_should_match, recalc_list_indexes, extend_list, extend_list_no_dups
+
+
+class CombineUnittests(unittest.TestCase):
+ def setUp(self):
+
+ self.existing = {
+ 'int':
+ 4, 'match_same':
+ 'test', 'command_line':
+ 'arg1 arg2 dup_arg', 'recalc_list': [{'array_index': 93},
+ {'array_index': 3}],
+ 'extend_list': [{'array_index': 0, 'key': 'text', 'val': 'data1'},
+ {'array_index': 1, 'key': 'text2', 'val': 'data2'}]
+ }
+ self.current = {
+ 'int':
+ 5, 'match_same':
+ 'test', 'command_line':
+ 'arg3 dup_arg arg4',
+ 'extend_list': [{'array_index': 0, 'key': 'text', 'val': 'data1'},
+ {'array_index': 1, 'key': 'text3', 'val': 'data3'}]
+ }
+
+ def test_set_lowest(self):
+ set_lowest(self.existing, self.current, 'int')
+ self.assertEqual(self.existing['int'], 4)
+
+ def test_set_greatest(self):
+ set_greatest(self.existing, self.current, 'int')
+ self.assertEqual(self.existing['int'], 5)
+
+ def test_combine_command_line(self):
+ combine_command_line(self.existing, self.current, 'command_line')
+ self.assertEqual(self.existing['command_line'], 'arg1 arg2 dup_arg arg3 arg4')
+
+ def test_if_set_should_match(self):
+ if_set_should_match(self.existing, self.current, 'match_same')
+ del self.current['match_same']
+ if_set_should_match(self.existing, self.current, 'match_same')
+ self.assertEqual(self.existing['match_same'], 'test')
+ self.current['match_same'] = 'test2'
+ self.assertRaises(Exception, if_set_should_match, self.existing, self.current, 'match_same')
+
+ def test_recalc_list_indexes(self):
+ recalc_list_indexes(self.existing['recalc_list'])
+ self.assertEqual(self.existing['recalc_list'], [{'array_index': 0}, {'array_index': 1}])
+
+ def test_extend_list(self):
+ extend_list(self.existing, self.current, 'extend_list')
+ self.assertEqual(self.existing['extend_list'],
+ [{'array_index': 0, 'key': 'text', 'val': 'data1'},
+ {'array_index': 1, 'key': 'text2', 'val': 'data2'},
+ {'array_index': 2, 'key': 'text', 'val': 'data1'},
+ {'array_index': 3, 'key': 'text3', 'val': 'data3'}])
+
+ def test_extend_list_no_dups(self):
+ extend_list_no_dups(self.existing, self.current, 'extend_list', 'key')
+ self.assertEqual(self.existing['extend_list'],
+ [{'array_index': 0, 'key': 'text', 'val': 'data1'},
+ {'array_index': 1, 'key': 'text2', 'val': 'data2'},
+ {'array_index': 2, 'key': 'text3', 'val': 'data3'}])
+
+ def test_extend_list_no_dups_bad_data(self):
+ if sys.platform != 'win32':
+ self.current['extend_list'][0]['val'] = 'bad_data'
+ self.assertRaises(Exception, extend_list_no_dups, self.existing, self.current,
+ 'extend_list', 'key')
+
+
+unittest.main()