From ef3ad37a33f38df9b20510c1c33510b0ceee8438 Mon Sep 17 00:00:00 2001 From: Daniel Moody Date: Tue, 7 Feb 2023 06:05:25 +0000 Subject: SERVER-73404 added task limiter tool to limit concurrency --- SConstruct | 67 +++++++++++-------------- buildscripts/setup_spawnhost_coredump | 6 +-- etc/evergreen.yml | 1 + etc/expansions.default.yml | 1 + evergreen/scons_compile.sh | 5 +- site_scons/site_tools/task_limiter.py | 92 +++++++++++++++++++++++++++++++++++ 6 files changed, 129 insertions(+), 43 deletions(-) create mode 100644 site_scons/site_tools/task_limiter.py diff --git a/SConstruct b/SConstruct index ad662ece3be..abb5949f3f5 100644 --- a/SConstruct +++ b/SConstruct @@ -1024,6 +1024,14 @@ env_vars.Add( converter=variable_shlex_converter, ) +env_vars.Add( + 'UNITTESTS_COMPILE_CONCURRENCY', + help= + 'Sets the ratio of total jobs for max concurrency when compiling unittests source files. Should be float between 0 and 1.', + default="1", + converter=lambda val: float(val) if val != '' else '', +) + env_vars.Add( 'DESTDIR', help='Where builds will install files', @@ -6287,45 +6295,28 @@ env.Alias("distsrc", "distsrc-tgz") # Do this as close to last as possible before reading SConscripts, so # that any tools that may have injected other things via emitters are included # among the side effect adornments. -# -# TODO: Move this to a tool. +env.Tool('task_limiter') if has_option('jlink'): - jlink = get_option('jlink') - if jlink <= 0: - env.FatalError("The argument to jlink must be a positive integer or float") - elif jlink < 1 and jlink > 0: - jlink = env.GetOption('num_jobs') * jlink - jlink = round(jlink) - if jlink < 1.0: - print("Computed jlink value was less than 1; Defaulting to 1") - jlink = 1.0 - - jlink = int(jlink) - target_builders = ['Program', 'SharedLibrary', 'LoadableModule'] - - # A bound map of stream (as in stream of work) name to side-effect - # file. Since SCons will not allow tasks with a shared side-effect - # to execute concurrently, this gives us a way to limit link jobs - # independently of overall SCons concurrency. - jlink_stream_map = dict() - - def jlink_emitter(target, source, env): - name = str(target[0]) - se_name = "#jlink-stream" + str(hash(name) % jlink) - se_node = jlink_stream_map.get(se_name, None) - if not se_node: - se_node = env.Entry(se_name) - # This may not be necessary, but why chance it - env.NoCache(se_node) - jlink_stream_map[se_name] = se_node - env.SideEffect(se_node, target) - return (target, source) - - for target_builder in target_builders: - builder = env['BUILDERS'][target_builder] - base_emitter = builder.emitter - new_emitter = SCons.Builder.ListEmitter([base_emitter, jlink_emitter]) - builder.emitter = new_emitter + + env.SetupTaskLimiter( + name='jlink', + concurrency_ratio=get_option('jlink'), + builders=['Program', 'SharedLibrary', 'LoadableModule'], + ) + +if env.get('UNITTESTS_COMPILE_CONCURRENCY'): + + if hasattr(SCons.Tool, 'cxx'): + c_suffixes = SCons.Tool.cxx.CXXSuffixes + else: + c_suffixes = SCons.Tool.msvc.CXXSuffixes + + env.SetupTaskLimiter( + name='unit_cc', + concurrency_ratio=float(env.get('UNITTESTS_COMPILE_CONCURRENCY')), + builders={'Object': c_suffixes, 'SharedObject': c_suffixes}, + source_file_regex=r"^.*_test\.cpp$", + ) # Keep this late in the game so that we can investigate attributes set by all the tools that have run. if has_option("cache"): diff --git a/buildscripts/setup_spawnhost_coredump b/buildscripts/setup_spawnhost_coredump index 0af8b52b3a6..d8f92b6a19d 100755 --- a/buildscripts/setup_spawnhost_coredump +++ b/buildscripts/setup_spawnhost_coredump @@ -251,10 +251,8 @@ EOF chmod +x ./create_build_id_links.sh cpus=$(getconf _NPROCESSORS_ONLN) - # notice we don't search lib directory as we assume dynamic builds build the index during - # the build. - find bin -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./add_index.sh - find bin -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./recalc_debuglink.sh + find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./add_index.sh + find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./recalc_debuglink.sh # This script constructs symblinks based off the build-id so GDB can skip the crc check # normally performed during .gnu_debuglink loading. diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 53cf96e1ee6..c934bfb6e76 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -3272,6 +3272,7 @@ buildvariants: -Wl,--no-gnu-unique" CCFLAGS="-fno-gnu-unique" clang_tidy_toolchain: v4 + num_scons_unit_cc_jobs_available: 0.2 compile_variant: *commit-queue depends_on: [] tasks: diff --git a/etc/expansions.default.yml b/etc/expansions.default.yml index abc72b2141b..dcfa0262da0 100644 --- a/etc/expansions.default.yml +++ b/etc/expansions.default.yml @@ -35,6 +35,7 @@ ext: tgz compiling_for_test: "false" install_directory: /data/mongo-install-directory num_scons_link_jobs_available: "0.25" +num_scons_unit_cc_jobs_available: "0.5" scons_cache_mode: nolinked show_scons_timings: "true" separate_debug: "on" diff --git a/evergreen/scons_compile.sh b/evergreen/scons_compile.sh index 91d1f363945..375de0e1aed 100755 --- a/evergreen/scons_compile.sh +++ b/evergreen/scons_compile.sh @@ -17,6 +17,9 @@ rm -rf ${install_directory} echo "Changing SCons to run with --jlink=${num_scons_link_jobs_available}" extra_args="$extra_args --jlink=${num_scons_link_jobs_available} --separate-debug=${separate_debug}" +echo "Changing SCons to run with UNITTESTS_COMPILE_CONCURRENCY=${num_scons_unit_cc_jobs_available}" +extra_args="$extra_args UNITTESTS_COMPILE_CONCURRENCY=${num_scons_unit_cc_jobs_available}" + if [ "${scons_cache_scope}" = "shared" ]; then extra_args="$extra_args --cache-debug=scons_cache.log" fi @@ -49,7 +52,7 @@ else extra_args="$extra_args --release" fi -extra_args="$extra_args SPLIT_DWARF=0" +extra_args="$extra_args SPLIT_DWARF=0 GDB_INDEX=0" if [ "${generating_for_ninja}" = "true" ] && [ "Windows_NT" = "$OS" ]; then vcvars="$(vswhere -latest -property installationPath | tr '\\' '/' | dos2unix.exe)/VC/Auxiliary/Build/" diff --git a/site_scons/site_tools/task_limiter.py b/site_scons/site_tools/task_limiter.py new file mode 100644 index 00000000000..d8761014803 --- /dev/null +++ b/site_scons/site_tools/task_limiter.py @@ -0,0 +1,92 @@ +import SCons + +import re + +task_limiter_patterns = {} + + +def setup_task_limiter(env, name, concurrency_ratio=0.75, builders=None, source_file_regex='.*', + target_file_regex='.*'): + + global task_limiter_patterns + + task_limiter_patterns[name] = {} + task_limiter_patterns[name]['source'] = re.compile(source_file_regex) + task_limiter_patterns[name]['target'] = re.compile(target_file_regex) + + # We need to convert the ratio value into a int that corrlates to a specific + # number of concurrent jobs allowed + concurrency_ratio = float(concurrency_ratio) + if concurrency_ratio <= 0.0: + env.FatalError( + f"The concurrency ratio for {name} must be a positive, got {max_concurrency}") + + if concurrency_ratio > 1.0: + concurrency_ratio = 1.0 + + max_concurrency = env.GetOption('num_jobs') * concurrency_ratio + max_concurrency = round(max_concurrency) + if max_concurrency < 1.0: + max_concurrency = 1.0 + + max_concurrency = int(max_concurrency) + + # A bound map of stream (as in stream of work) name to side-effect + # file. Since SCons will not allow tasks with a shared side-effect + # to execute concurrently, this gives us a way to limit link jobs + # independently of overall SCons concurrency. + concurrent_stream_map = dict() + + def task_limiter_emitter(target, source, env): + global task_limiter_patterns + nonlocal name + + matched = False + for s_file in source: + if re.search(task_limiter_patterns[name]['source'], s_file.path): + matched = True + break + + if not matched: + for t_file in target: + if re.search(task_limiter_patterns[name]['target'], t_file.path): + matched = True + break + if matched: + se_name = f"#{name}-stream{hash(str(target[0])) % max_concurrency}" + se_node = concurrent_stream_map.get(se_name, None) + if not se_node: + se_node = env.Entry(se_name) + # This may not be necessary, but why chance it + env.NoCache(se_node) + concurrent_stream_map[se_name] = se_node + env.SideEffect(se_node, target) + + return (target, source) + + if isinstance(builders, dict): + for target_builder, suffixes in builders.items(): + builder = env['BUILDERS'][target_builder] + emitterdict = builder.builder.emitter + for suffix in emitterdict.keys(): + if not suffix in suffixes: + continue + base = emitterdict[suffix] + emitterdict[suffix] = SCons.Builder.ListEmitter([ + base, + task_limiter_emitter, + ]) + else: + for target_builder in builders: + builder = env['BUILDERS'][target_builder] + base_emitter = builder.emitter + new_emitter = SCons.Builder.ListEmitter([base_emitter, task_limiter_emitter]) + builder.emitter = new_emitter + + +def exists(env): + return True + + +def generate(env): + env.AddMethod(setup_task_limiter, 'SetupTaskLimiter') -- cgit v1.2.1