From 6038f1055b2bf9388ebeff5923b7e6edf5a0568f Mon Sep 17 00:00:00 2001 From: Daniel Moody Date: Fri, 12 Aug 2022 20:14:16 +0000 Subject: SERVER-67059 add chrome tracer generation for build metrics --- site_scons/site_tools/build_metrics/__init__.py | 65 +++++++++++++++++++++++- site_scons/site_tools/build_metrics/artifacts.py | 2 + 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/site_scons/site_tools/build_metrics/__init__.py b/site_scons/site_tools/build_metrics/__init__.py index c2c8b8fad94..c0786ddef84 100644 --- a/site_scons/site_tools/build_metrics/__init__.py +++ b/site_scons/site_tools/build_metrics/__init__.py @@ -51,7 +51,7 @@ def finalize_build_metrics(env): sys.stdout.write(f"Processing {m.get_name()}...") sys.stdout.flush() key, value = m.finalize() - sys.stdout.write(f" {timer() - start_time}s\n") + sys.stdout.write(f" {round(timer() - start_time, 2)}s\n") metrics[key] = value with open(os.path.join(os.path.dirname(__file__), "build_metrics_format.schema")) as f: @@ -63,6 +63,69 @@ def finalize_build_metrics(env): else: with open(build_metrics_file, 'w') as f: json.dump(metrics, f, indent=4, sort_keys=True) + with open(f"{os.path.splitext(build_metrics_file)[0]}-chrome-tracer.json", 'w') as f: + json.dump(generate_chrome_tracer_json(metrics), f, indent=4) + + +def generate_chrome_tracer_json(metrics): + tracer_json = {"traceEvents": []} + job_slots = [] + task_stack = sorted(metrics['build_tasks'], reverse=True, key=lambda x: x['start_time']) + + # Chrome trace organizes tasks per pids, so if we want to have a clean layout which + # clearly shows concurrent processes, we are creating job slots by comparing start and + # end times, and using "pid" as the job slot identifier. job_slots are a list of chronologically + # in order tasks. We keep a list of job slots and always check at the end of the job slot to + # compare the lowest end time that will accommodate the next task start time. If there are no + # job slots which can accommodate the next task, we create a new job slot. Note the job slots + # ordering is similar to how the OS process scheduler would organize and start the processes + # from the build, however we are reproducing this retroactively and simplistically and it + # is not guaranteed to match exactly. + while task_stack: + task = task_stack.pop() + candidates = [ + job_slot for job_slot in job_slots if job_slot[-1]['end_time'] < task['start_time'] + ] + if candidates: + # We need to find the best job_slot to add this next task too, so we look at the + # end_times, the one with the lowest would have been the first one available. We just + # arbitrarily guess the first one will be the best, then iterate to find out which + # one is the best. We then add to the existing job_slot which best_candidate points to. + min_end = candidates[0][-1]['end_time'] + best_candidate = candidates[0] + for candidate in candidates: + if candidate[-1]['end_time'] < min_end: + best_candidate = candidate + min_end = candidate[-1]['end_time'] + + best_candidate.append(task) + else: + # None of the current job slots were available to accommodate the new task so we + # make a new one. + job_slots.append([task]) + + for i, job_slot in enumerate(job_slots): + for build_task in job_slot: + + tracer_json['traceEvents'].append({ + 'name': + build_task['outputs'][0] if build_task['outputs'] else build_task['builder'], + 'cat': + build_task['builder'], + 'ph': + 'X', + 'ts': + build_task['start_time'] / 1000.0, + 'dur': (build_task['end_time'] - build_task['start_time']) / 1000.0, + 'pid': + i, + 'args': { + "cpu": build_task['cpu_time'], + "mem": build_task['mem_usage'], + }, + }) + + return tracer_json def generate(env, **kwargs): diff --git a/site_scons/site_tools/build_metrics/artifacts.py b/site_scons/site_tools/build_metrics/artifacts.py index 386d996d130..7751575ecf1 100644 --- a/site_scons/site_tools/build_metrics/artifacts.py +++ b/site_scons/site_tools/build_metrics/artifacts.py @@ -159,6 +159,8 @@ class CollectArtifacts(BuildMetricsCollector): self._build_dir = env.get("BUILD_METRICS_ARTIFACTS_DIR", env.Dir('#').abspath) self._artifacts = [] self._bloaty_bin = env.get("BUILD_METRICS_BLOATY", env.WhereIs('bloaty')) + if self._bloaty_bin is None: + self._bloaty_bin = "bloaty" self._metrics = {"total_artifact_size": 0, "num_artifacts": 0, "artifacts": []} def get_name(self): -- cgit v1.2.1