From 437af4d512b729d8a2a6e765eade41bcf5be6f95 Mon Sep 17 00:00:00 2001 From: Chenhao Qu Date: Wed, 6 Oct 2021 06:50:01 +0000 Subject: Import wiredtiger: 0c90aafd226dbf337f294bd4b3beb26060c1109e from branch mongodb-master ref: 40af161e26..0c90aafd22 for: 5.2.0 WT-5270 create wtperf script for evergreen --- .../bench/wtperf/wtperf_run_py/perf_stat.py | 45 ++++++ .../wtperf/wtperf_run_py/perf_stat_collection.py | 37 +++++ .../bench/wtperf/wtperf_run_py/wtperf_config.py | 23 +++ .../bench/wtperf/wtperf_run_py/wtperf_run.py | 167 +++++++++++++++++++++ src/third_party/wiredtiger/import.data | 2 +- src/third_party/wiredtiger/test/evergreen.yml | 161 ++++++++++++++++++++ 6 files changed, 434 insertions(+), 1 deletion(-) create mode 100644 src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py create mode 100644 src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py create mode 100644 src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py create mode 100644 src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py new file mode 100644 index 00000000000..de62d328373 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py @@ -0,0 +1,45 @@ +class PerfStat: + def __init__(self, + short_label: str, + pattern: str, + input_offset: int, + output_label: str, + output_precision: int = 0, + conversion_function=int): + self.short_label: str = short_label + self.pattern: str = pattern + self.input_offset: int = input_offset + self.output_label: str = output_label + self.output_precision: int = output_precision + self.conversion_function = conversion_function + self.values = [] + + def add_value(self, value): + converted_value = self.conversion_function(value) + self.values.append(converted_value) + + def get_num_values(self): + return len(self.values) + + def get_average(self): + num_values = len(self.values) + total = sum(self.values) + average = self.conversion_function(total / num_values) + return average + + def get_skipminmax_average(self): + num_values = len(self.values) + assert num_values >= 3 + minimum = min(self.values) + maximum = max(self.values) + total = sum(self.values) + total_skipminmax = total - maximum - minimum + num_values_skipminmax = num_values - 2 + skipminmax_average = self.conversion_function(total_skipminmax / num_values_skipminmax) + return skipminmax_average + + def get_core_average(self): + if len(self.values) >= 3: + return self.get_skipminmax_average() + else: + return self.get_average() diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py new file mode 100644 index 00000000000..d2784901311 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py @@ -0,0 +1,37 @@ +import re +from perf_stat import PerfStat + +def find_stat(test_stat_path: str, pattern: str, position_of_value: int): + for line in open(test_stat_path): + match = re.match(pattern, line) + if match: + return float(line.split()[position_of_value]) + return 0 + +class PerfStatCollection: + def __init__(self): + self.perf_stats = {} + + def add_stat(self, perf_stat: PerfStat): + self.perf_stats[perf_stat.short_label] = perf_stat + + def find_stats(self, test_stat_path: str): + for stat in self.perf_stats.values(): + value = find_stat(test_stat_path=test_stat_path, + pattern=stat.pattern, + position_of_value=stat.input_offset) + stat.add_value(value=value) + + def to_value_list(self): + as_list = [] + for stat in self.perf_stats.values(): + as_list.append({ + 'name': stat.output_label, + 'value': stat.get_core_average(), + 'values': stat.values + }) + return as_list + + def to_dict(self): + as_dict = {'metrics': self.to_value_list()} + return as_dict diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py new file mode 100644 index 00000000000..d2b70dff2dc --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py @@ -0,0 +1,23 @@ +class WTPerfConfig: + def __init__(self, + wtperf_path: str, + home_dir: str, + test: str, + environment: str = None, + run_max: int = 1, + verbose: bool = False): + self.wtperf_path: str = wtperf_path + self.home_dir: str = home_dir + self.test: str = test + self.environment: str = environment + self.run_max: int = run_max + self.verbose: bool = verbose + + def to_value_dict(self): + as_dict = {'wt_perf_path': self.wtperf_path, + 'test': self.test, + 'home_dir': self.home_dir, + 'environment': self.environment, + 'run_max': self.run_max, + 'verbose': self.verbose} + return as_dict diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py new file mode 100644 index 00000000000..fcd9e8d0cb8 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py @@ -0,0 +1,167 @@ +import argparse +import json +import os.path +import re +import subprocess +import sys +import platform +import psutil + +from wtperf_config import WTPerfConfig +from perf_stat import PerfStat +from perf_stat_collection import PerfStatCollection + +# example parameters: -p /Users/jeremy.thorp/Git/wiredtiger/build/bench/wtperf/wtperf -t ../runners/small-lsm.wtperf -v -ho WT_TEST -m 3 + +# the 'test.stat' file is where wt-perf.c writes out it's statistics +# (within the directory specified by the 'home' parameter) +test_stats_file = 'test.stat' + +def create_test_home_path(home: str, test_run: int): + return '{}_{}'.format(home, test_run) + +def create_test_stat_path(test_home_path: str): + return os.path.join(test_home_path, test_stats_file) + +def find_stat(test_stat_path: str, pattern: str, position_of_value: int): + for line in open(test_stat_path): + match = re.match(pattern, line) + if match: + return line.split()[position_of_value] + return 0 + +def construct_wtperf_command_line(wtperf: str, env: str, test: str, home: str): + command_line = [] + if env is not None: + command_line.append(env) + command_line.append(wtperf) + if test is not None: + command_line.append('-O') + command_line.append(test) + if home is not None: + command_line.append('-h') + command_line.append(home) + return command_line + +def run_test(config: WTPerfConfig, test_run: int): + test_home = create_test_home_path(home=config.home_dir, test_run=test_run) + command_line = construct_wtperf_command_line( + wtperf=config.wtperf_path, + env=config.environment, + test=config.test, + home=test_home) + # print('Command Line for test: {}'.format(command_line)) + subprocess.run(command_line) + +def process_results(config: WTPerfConfig, perf_stats: PerfStatCollection): + for test_run in range(config.run_max): + test_home = create_test_home_path(home=config.home_dir, test_run=test_run) + test_stats_path = create_test_stat_path(test_home) + if config.verbose: + print('Reading test stats file: {}'.format(test_stats_path)) + perf_stats.find_stats(test_stat_path=test_stats_path) + + total_memory_gb = psutil.virtual_memory().total / (1024 * 1024 * 1024) + as_dict = {'config': config.to_value_dict(), + 'metrics': perf_stats.to_value_list(), + 'system': { + 'cpu_physical_cores': psutil.cpu_count(logical=False), + 'cpu_logical_cores': psutil.cpu_count(), + 'total_physical_memory_gb': total_memory_gb, + 'platform': platform.platform()} + } + return as_dict + +def setup_perf_stats(): + perf_stats = PerfStatCollection() + perf_stats.add_stat(PerfStat(short_label="load", + pattern='Load time:', + input_offset=2, + output_label='Load time:', + output_precision=2, + conversion_function=float)) + perf_stats.add_stat(PerfStat(short_label="insert", + pattern='Executed \d+ insert operations', + input_offset=1, + output_label='Insert count:')) + perf_stats.add_stat(PerfStat(short_label="modify", + pattern='Executed \d+ modify operations', + input_offset=1, + output_label='Modify count:')) + perf_stats.add_stat(PerfStat(short_label="read", + pattern='Executed \d+ read operations', + input_offset=1, + output_label='Read count:')) + perf_stats.add_stat(PerfStat(short_label="truncate", + pattern='Executed \d+ truncate operations', + input_offset=1, + output_label='Truncate count:')) + perf_stats.add_stat(PerfStat(short_label="update", + pattern='Executed \d+ update operations', + input_offset=1, + output_label='Update count:')) + return perf_stats + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--wtperf', help='path of the wtperf executable') + parser.add_argument('-e', '--env', help='any environment variables that need to be set for running wtperf') + parser.add_argument('-t', '--test', help='path of the wtperf test to execute') + parser.add_argument('-o', '--outfile', help='path of the file to write test output to') + parser.add_argument('-m', '--runmax', type=int, default=1, help='maximum number of times to run the test') + parser.add_argument('-ho', '--home', help='path of the "home" directory that wtperf will use') + parser.add_argument('-re', + '--reuse', + action="store_true", + help='reuse and reanalyse results from previous tests rather than running tests again') + parser.add_argument('-v', '--verbose', action="store_true", help='be verbose') + args = parser.parse_args() + + if args.verbose: + print('WTPerfPy') + print('========') + print("Configuration:") + print(" WtPerf path: {}".format(args.wtperf)) + print(" Environment: {}".format(args.env)) + print(" Test path: {}".format(args.test)) + print(" Home base: {}".format(args.home)) + print(" Outfile: {}".format(args.outfile)) + print(" Runmax: {}".format(args.runmax)) + print(" Reuse results: {}".format(args.reuse)) + + if args.wtperf is None: + sys.exit('The path to the wtperf executable is required') + if args.test is None: + sys.exit('The path to the test file is required') + if args.home is None: + sys.exit('The path to the "home" directory is required') + + config = WTPerfConfig(wtperf_path=args.wtperf, + home_dir=args.home, + test=args.test, + environment=args.env, + run_max=args.runmax, + verbose=args.verbose) + + perf_stats: PerfStatCollection = setup_perf_stats() + + # Run tests (if we're not reusing results) + if not args.reuse: + for test_run in range(args.runmax): + print("Starting test {}".format(test_run)) + run_test(config=config, test_run=test_run) + print("Completed test {}".format(test_run)) + + # Process results + perf_dict = process_results(config, perf_stats) + perf_json = json.dumps(perf_dict, indent=4, sort_keys=True) + + if args.verbose: + print("JSON: {}".format(perf_json)) + + if args.outfile: + with open(args.outfile, 'w') as outfile: + json.dump(perf_dict, outfile, indent=4, sort_keys=True) + +if __name__ == '__main__': + main() diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 5a0958197b2..821289e1741 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "40af161e2601336298f57ce1ddb9b0edd0bc1332" + "commit": "0c90aafd226dbf337f294bd4b3beb26060c1109e" } diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 1c4701191ac..a775a9aec7a 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -554,6 +554,35 @@ functions: done done + "generic-perf-test": + # Run a performance test + # Parameterised using the 'perf-test-name' and 'maxruns' variables + - command: shell.exec + params: + working_dir: "wiredtiger/bench/wtperf/wtperf_run_py" + shell: bash + script: | + set -o errexit + set -o verbose + ${virtualenv_binary} -p ${python_binary} venv + source venv/bin/activate + ${pip3_binary} install psutil + ${python_binary} wtperf_run.py -p ../../../cmake_build/bench/wtperf/wtperf -t ../runners/${perf-test-name}.wtperf -v -ho WT_TEST -m ${maxruns} -o out.json + + "generic-perf-test-push-results": + # Push the json results to the 'Files' tab of the task in Evergreen + # Parameterised using the 'perf-test-name' variable + - command: s3.put + params: + aws_secret: ${aws_secret} + aws_key: ${aws_key} + local_file: wiredtiger/bench/wtperf/wtperf_run_py/out.json + bucket: build_external + permissions: public-read + content_type: text/html + display_name: "Test results (JSON)" + remote_file: wiredtiger/${build_variant}/${revision}/perf-test-${perf-test-name}-${build_id}-${execution}/test-results.json + ######################################################################################### # VARIABLES # @@ -2873,6 +2902,111 @@ tasks: set -o errexit python "../metrixplusplus/metrix++.py" limit --max-limit=std.code.complexity:cyclomatic:95 + ############################# + # Performance Tests for lsm # + ############################# + + - name: perf-test-small-lsm + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: small-lsm + maxruns: 3 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: small-lsm + + - name: perf-test-medium-lsm + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: medium-lsm + maxruns: 1 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: medium-lsm + + - name: perf-test-medium-lsm-compact + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: medium-lsm-compact + maxruns: 1 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: medium-lsm-compact + + - name: perf-test-medium-multi-lsm + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: medium-multi-lsm + maxruns: 1 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: medium-multi-lsm + + ############################### + # Performance Tests for btree # + ############################### + + - name: perf-test-small-btree + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: small-btree + maxruns: 1 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: small-btree + + - name: perf-test-small-btree-backup + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: small-btree-backup + maxruns: 1 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: small-btree-backup + + - name: perf-test-medium-btree + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: medium-btree + maxruns: 3 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: medium-btree + + - name: perf-test-medium-btree-backup + commands: + - func: "get project" + - func: "compile wiredtiger" + - func: "generic-perf-test" + vars: + perf-test-name: medium-btree-backup + maxruns: 3 + - func: "generic-perf-test-push-results" + vars: + perf-test-name: medium-btree-backup + + buildvariants: - name: ubuntu2004 @@ -3051,6 +3185,33 @@ buildvariants: - name: ".stress-test-4" - name: format-abort-recovery-stress-test +- name: ubuntu2004-perf-tests + display_name: Ubuntu 20.04 Performance tests + run_on: + - ubuntu2004-test + expansions: + test_env_vars: LD_LIBRARY_PATH=$(pwd) WT_BUILDDIR=$(pwd) + posix_configure_flags: -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/mongodbtoolchain_v3_gcc.cmake -DCMAKE_C_FLAGS="-ggdb" -DHAVE_DIAGNOSTIC=1 -DENABLE_ZLIB=1 -DENABLE_SNAPPY=1 -DENABLE_STRICT=1 -DCMAKE_INSTALL_PREFIX=$(pwd)/LOCAL_INSTALL + python_binary: '/opt/mongodbtoolchain/v3/bin/python3' + pip3_binary: '/opt/mongodbtoolchain/v3/bin/pip3' + virtualenv_binary: '/opt/mongodbtoolchain/v3/bin/virtualenv' + smp_command: -j $(echo "`grep -c ^processor /proc/cpuinfo` * 2" | bc) + cmake_generator: Ninja + make_command: ninja + is_cmake_build: true + tasks: + # btree tests + - name: perf-test-small-btree + - name: perf-test-small-btree-backup + - name: perf-test-medium-btree + - name: perf-test-medium-btree-backup + # lsm tests + - name: perf-test-small-lsm + - name: perf-test-medium-lsm + - name: perf-test-medium-lsm-compact + - name: perf-test-medium-multi-lsm + + - name: large-scale-tests display_name: "Large scale tests" batchtime: 480 # 3 times a day -- cgit v1.2.1