diff options
author | Etienne Petrel <etienne.petrel@mongodb.com> | 2021-11-18 14:43:40 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-18 15:39:40 +0000 |
commit | 529676f7505877a1c6a39f3ce262a7094f9f8c1a (patch) | |
tree | 03c77a73a8841fd971c512e217153ec4835b513d /src/third_party | |
parent | ab78b9370bf2884030cbca35f544d609f86542f2 (diff) | |
download | mongo-529676f7505877a1c6a39f3ce262a7094f9f8c1a.tar.gz |
Import wiredtiger: edf2f353acfa275030d51482633436f2f4fa3174 from branch mongodb-master
ref: 1f8d853a00..edf2f353ac
for: 5.2.0
WT-5014 Migrate jenkins wiredtiger perf long job to evergreen
Diffstat (limited to 'src/third_party')
5 files changed, 157 insertions, 65 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py index 53e5f5af034..a75de30d154 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py @@ -30,6 +30,7 @@ import glob import json import re +from typing import List class PerfStat: @@ -118,18 +119,19 @@ class PerfStatCount(PerfStat): class PerfStatLatency(PerfStat): - def __init__(self, short_label: str, stat_file: str, output_label: str, num_max: int): + def __init__(self, short_label: str, stat_file: str, output_label: str, ops: List[str], num_max: int): super().__init__(short_label=short_label, stat_file=stat_file, output_label=output_label) self.num_max = num_max + self.ops = ops def find_stat(self, test_stat_path: str): values = [] for line in open(test_stat_path): as_dict = json.loads(line) - values.append(as_dict["wtperf"]["read"]["max latency"]) - values.append(as_dict["wtperf"]["update"]["max latency"]) + for operation in self.ops: + values.append(as_dict["wtperf"][operation]["max latency"]) return values def get_value(self, nth_max: int): diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py index 6683f92d18c..867cbdecf10 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py @@ -43,11 +43,8 @@ from typing import List from wtperf_config import WTPerfConfig -def create_test_home_path(home: str, test_run: int, operations: List[str] = None): - home_path = "{}_{}".format(home, test_run) - if operations: - # Use the first operation name as part of the home path - home_path += "_{}".format(operations[0]) +def create_test_home_path(home: str, test_run: int, index:int): + home_path = "{}_{}_{}".format(home, test_run, index) return home_path @@ -124,15 +121,17 @@ def detailed_perf_stats(config: WTPerfConfig, perf_stats: PerfStatCollection): return as_dict -def run_test_wrapper(config: WTPerfConfig, operations: List[str] = None, arguments: List[str] = None): +def run_test_wrapper(config: WTPerfConfig, index: int = 0, arguments: List[str] = None): for test_run in range(config.run_max): print("Starting test {}".format(test_run)) - run_test(config=config, test_run=test_run, operations=operations, arguments=arguments) + run_test(config=config, test_run=test_run, index=index, arguments=arguments) print("Completed test {}".format(test_run)) -def run_test(config: WTPerfConfig, test_run: int, operations: List[str] = None, arguments: List[str] = None): - test_home = create_test_home_path(home=config.home_dir, test_run=test_run, operations=operations) +def run_test(config: WTPerfConfig, test_run: int, index: int = 0, arguments: List[str] = None): + test_home = create_test_home_path(home=config.home_dir, test_run=test_run, index=index) + if config.verbose: + print("Home directory path created: {}".format(test_home)) command_line = construct_wtperf_command_line( wtperf=config.wtperf_path, env=config.environment, @@ -147,9 +146,9 @@ def run_test(config: WTPerfConfig, test_run: int, operations: List[str] = None, exit(1) -def process_results(config: WTPerfConfig, perf_stats: PerfStatCollection, operations: List[str] = None): +def process_results(config: WTPerfConfig, perf_stats: PerfStatCollection, operations: List[str] = None, index: int = 0): for test_run in range(config.run_max): - test_home = create_test_home_path(home=config.home_dir, test_run=test_run, operations=operations) + test_home = create_test_home_path(home=config.home_dir, test_run=test_run, index=index) if config.verbose: print('Reading stats from {} directory.'.format(test_home)) perf_stats.find_stats(test_home=test_home, operations=operations) @@ -183,6 +182,10 @@ def setup_perf_stats(): pattern=r'Executed \d+ update operations', input_offset=1, output_label='Update count')) + perf_stats.add_stat(PerfStat(short_label="checkpoint", + pattern=r'Executed \d+ checkpoint operations', + input_offset=1, + output_label='Checkpoint count')) perf_stats.add_stat(PerfStatMax(short_label="max_update_throughput", pattern=r'updates,', input_offset=8, @@ -193,15 +196,34 @@ def setup_perf_stats(): output_label='Min update throughput')) perf_stats.add_stat(PerfStatCount(short_label="warnings", pattern='WARN', - output_label='Warnings')) - perf_stats.add_stat(PerfStatLatency(short_label="max_latencies", + output_label='Latency warnings')) + perf_stats.add_stat(PerfStatLatency(short_label="top5_latencies_read_update", stat_file='monitor.json', - output_label='Latency Max', + output_label='Latency(read, update) Max', + ops = ['read', 'update'], num_max = 5)) perf_stats.add_stat(PerfStatCount(short_label="eviction_page_seen", stat_file='WiredTigerStat*', pattern='[0-9].wt cache: pages seen by eviction', output_label='Pages seen by eviction')) + perf_stats.add_stat(PerfStatLatency(short_label="max_latency_insert", + stat_file='monitor.json', + output_label='Latency(insert) Max', + ops = ['insert'], + num_max = 1)) + perf_stats.add_stat(PerfStatLatency(short_label="max_latency_read_update", + stat_file='monitor.json', + output_label='Latency(read, update) Max', + ops = ['read', 'update'], + num_max = 1)) + perf_stats.add_stat(PerfStatMax(short_label="max_read_throughput", + pattern=r'updates,', + input_offset=4, + output_label='Max read throughput')) + perf_stats.add_stat(PerfStatMin(short_label="min_read_throughput", + pattern=r'updates,', + input_offset=4, + output_label='Min read throughput')) return perf_stats @@ -285,9 +307,9 @@ def main(): for content in batch_file_contents: if args.verbose: print("Argument: {}, Operation: {}".format(content["arguments"], content["operations"])) - run_test_wrapper(config=config, operations=content["operations"], arguments=content["arguments"]) + run_test_wrapper(config=config, index=batch_file_contents.index(content), arguments=content["arguments"]) else: - run_test_wrapper(config=config, arguments=arguments, operations=operations) + run_test_wrapper(config=config, arguments=arguments) if not args.verbose and not args.outfile: sys.exit("Enable verbosity (or provide a file path) to dump the stats. " @@ -296,7 +318,7 @@ def main(): # Process result if config.batch_file: for content in batch_file_contents: - process_results(config, perf_stats, operations=content["operations"]) + process_results(config, perf_stats, operations=content["operations"], index=batch_file_contents.index(content)) else: process_results(config, perf_stats, operations=operations) diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 10cba2d3163..ba3860efc93 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "1f8d853a00b06955ee53a8e4c24df243c8d5e2f8" + "commit": "edf2f353acfa275030d51482633436f2f4fa3174" } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 3d1cf888fe0..f895e97d605 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -292,7 +292,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_REF *ref; WT_TIME_AGGREGATE ta; size_t size; - bool hazard, key_onpage_ovfl; + bool hazard; const void *p; btree = S2BT(session); @@ -336,13 +336,18 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * instantiated, off-page key, we don't bother setting them if that's not possible. */ cell = NULL; - key_onpage_ovfl = false; ikey = __wt_ref_key_instantiated(ref); if (ikey != NULL && ikey->cell_offset != 0) { cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset); __wt_cell_unpack_addr(session, page->dsk, cell, kpack); - key_onpage_ovfl = - F_ISSET(kpack, WT_CELL_UNPACK_OVERFLOW) && kpack->raw != WT_CELL_KEY_OVFL_RM; + + /* + * Historically, we stored overflow cookies on internal pages, discard any underlying + * blocks. We have a copy to build the key (the key was instantiated when we read the + * page into memory), they won't be needed in the future as we're rewriting the page. + */ + if (F_ISSET(kpack, WT_CELL_UNPACK_OVERFLOW) && kpack->raw != WT_CELL_KEY_OVFL_RM) + WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell)); } WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state)); @@ -353,14 +358,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) case WT_CHILD_IGNORE: /* * Ignored child. - * - * Overflow keys referencing pages we're not writing are no longer useful, schedule them - * for discard. Don't worry about instantiation, internal page keys are always - * instantiated. Don't worry about reuse, reusing this key in this reconciliation is - * unlikely. */ - if (key_onpage_ovfl) - WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell)); WT_CHILD_RELEASE_ERR(session, hazard, ref); continue; @@ -370,26 +368,9 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) */ switch (child->modify->rec_result) { case WT_PM_REC_EMPTY: - /* - * Overflow keys referencing empty pages are no longer useful, schedule them for - * discard. Don't worry about instantiation, internal page keys are always - * instantiated. Don't worry about reuse, reusing this key in this reconciliation is - * unlikely. - */ - if (key_onpage_ovfl) - WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell)); WT_CHILD_RELEASE_ERR(session, hazard, ref); continue; case WT_PM_REC_MULTIBLOCK: - /* - * Overflow keys referencing split pages are no longer useful (the split page's key - * is the interesting key); schedule them for discard. Don't worry about - * instantiation, internal page keys are always instantiated. Don't worry about - * reuse, reusing this key in this reconciliation is unlikely. - */ - if (key_onpage_ovfl) - WT_ERR(__wt_ovfl_discard_add(session, page, kpack->cell)); - WT_ERR(__rec_row_merge(session, r, child)); WT_CHILD_RELEASE_ERR(session, hazard, ref); continue; diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index dcb0ea09c54..689bad441a3 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -600,13 +600,16 @@ functions: script: | set -o errexit set -o verbose + if [ ${no_create|false} = false ]; then + rm -rf WT_TEST* + fi ${virtualenv_binary} -p ${python_binary} venv source venv/bin/activate ${pip3_binary} install psutil pygit2 JSON_TASK_INFO='{ "evergreen_task_info": { "is_patch": "'${is_patch}'", "task_id": "'${task_id}'", "distro_id": "'${distro_id}'" } }' echo "JSON_TASK_INFO: $JSON_TASK_INFO" - ${test_env_vars|} ${python_binary} ../../../bench/wtperf/wtperf_run_py/wtperf_run.py -p ./wtperf -t ../../../bench/wtperf/runners/${perf-test-name}.wtperf -ho WT_TEST -m ${maxruns} -g "../.." -v -i "$JSON_TASK_INFO" -b -o test_stats/evergreen_out.json ${wtarg} - ${test_env_vars|} ${python_binary} ../../../bench/wtperf/wtperf_run_py/wtperf_run.py -p ./wtperf -t ../../../bench/wtperf/runners/${perf-test-name}.wtperf -ho WT_TEST -m ${maxruns} -g "../.." -v -i "$JSON_TASK_INFO" -re -o test_stats/atlas_out.json ${wtarg} + ${test_env_vars|} ${python_binary} ../../../bench/wtperf/wtperf_run_py/wtperf_run.py -p ./wtperf -t ../../../bench/wtperf/runners/${perf-test-name}.wtperf -ho WT_TEST -m ${maxruns} -g "../.." -v -i "$JSON_TASK_INFO" -b -o test_stats/evergreen_out_${perf-test-name}.json ${wtarg} + ${test_env_vars|} ${python_binary} ../../../bench/wtperf/wtperf_run_py/wtperf_run.py -p ./wtperf -t ../../../bench/wtperf/runners/${perf-test-name}.wtperf -ho WT_TEST -m ${maxruns} -g "../.." -v -i "$JSON_TASK_INFO" -re -o test_stats/atlas_out_${perf-test-name}.json ${wtarg} "upload-perf-test-stats": - command: shell.exec @@ -619,22 +622,24 @@ functions: ${virtualenv_binary} -p ${python_binary} venv source venv/bin/activate ${pip3_binary} install pymongo[srv] - git clone git@github.com:wiredtiger/automation-scripts.git - ${python_binary} automation-scripts/evergreen/upload_stats_atlas.py -u ${atlas_wt_perf_test_user} -p ${atlas_wt_perf_pass} -f test_stats/atlas_out.json -b ${branch_name} + if [[ ! -d "automation-scripts" ]]; then + git clone git@github.com:wiredtiger/automation-scripts.git + fi + ${python_binary} automation-scripts/evergreen/upload_stats_atlas.py -u ${atlas_wt_perf_test_user} -p ${atlas_wt_perf_pass} -f test_stats/atlas_out_${perf-test-name}.json -b ${branch_name} - command: perf.send params: - file: ./wiredtiger/cmake_build/bench/wtperf/test_stats/evergreen_out.json + file: ./wiredtiger/cmake_build/bench/wtperf/test_stats/evergreen_out_${perf-test-name}.json # Push the json results to the 'Files' tab of the task in Evergreen # Parameterised using the 'perf-test-name' variable - command: s3.put params: aws_secret: ${aws_secret} aws_key: ${aws_key} - local_files_include_filter: wiredtiger/cmake_build/bench/wtperf/test_stats/* + local_files_include_filter: wiredtiger/cmake_build/bench/wtperf/test_stats/*_${perf-test-name}.json bucket: build_external permissions: public-read content_type: text/html - remote_file: wiredtiger/${build_variant}/${revision}/perf-test-${perf-test-name}-${build_id}-${execution}/ + remote_file: wiredtiger/${build_variant}/${revision}/${task_name}-${build_id}-${execution}/ "validate-expected-stats": - command: shell.exec @@ -3380,15 +3385,15 @@ tasks: vars: perf-test-name: multi-btree-zipfian-populate maxruns: 1 - wtarg: -ops ['"read"'] - func: "run-perf-test" vars: perf-test-name: multi-btree-zipfian-workload maxruns: 1 + no_create: true wtarg: -ops ['"read"'] - func: "upload-perf-test-stats" vars: - perf-test-name: multi-btree-zipfian + perf-test-name: multi-btree-zipfian-workload - name: perf-test-many-table-stress tags: ["stress-perf"] @@ -3426,7 +3431,7 @@ tasks: - func: "validate-expected-stats" vars: expected-stats: '{"Pages seen by eviction": 200}' - stat_file: './test_stats/evergreen_out.json' + stat_file: './test_stats/evergreen_out_evict-fairness.json' - name: perf-test-evict-btree-stress-multi tags: ["stress-perf"] @@ -3438,7 +3443,7 @@ tasks: vars: perf-test-name: evict-btree-stress-multi maxruns: 1 - wtarg: -ops ['"warnings", "max_latencies"'] + wtarg: -ops ['"warnings", "top5_latencies_read_update"'] - func: "upload-perf-test-stats" vars: perf-test-name: evict-btree-stress-multi @@ -3539,7 +3544,7 @@ tasks: wtarg: -args ['"-C log=(enabled,file_max=1M)"'] -ops ['"update"'] - func: "upload-perf-test-stats" vars: - perf-test-name: log-small-files + perf-test-name: log - name: perf-test-log-no-checkpoints tags: ["log-perf"] @@ -3554,7 +3559,7 @@ tasks: wtarg: -args ['"-C checkpoint=(wait=0)"'] -ops ['"update"'] - func: "upload-perf-test-stats" vars: - perf-test-name: log-no-checkpoints + perf-test-name: log - name: perf-test-log-no-prealloc tags: ["log-perf"] @@ -3569,7 +3574,7 @@ tasks: wtarg: -args ['"-C log=(enabled,file_max=1M,prealloc=false)"'] -ops ['"update"'] - func: "upload-perf-test-stats" vars: - perf-test-name: log-no-prealloc + perf-test-name: log - name: perf-test-log-zero-fill tags: ["log-perf"] @@ -3584,7 +3589,7 @@ tasks: wtarg: -args ['"-C log=(enabled,file_max=1M,zero_fill=true)"'] -ops ['"update"'] - func: "upload-perf-test-stats" vars: - perf-test-name: log-zero-fill + perf-test-name: log - name: perf-test-log-many-threads tags: ["log-perf"] @@ -3599,7 +3604,85 @@ tasks: wtarg: -args ['"-C log=(enabled,file_max=1M),session_max=256", "-o threads=((count=128,updates=1))"'] -ops ['"update"'] - func: "upload-perf-test-stats" vars: - perf-test-name: log-many-threads + perf-test-name: log + + ########################################### + # Performance Long Tests # + ########################################### + + - name: perf-test-long-btree + tags: ["long-perf"] + depends_on: + - name: compile + commands: + - command: timeout.update + params: + exec_timeout_secs: 86400 + timeout_secs: 86400 + - func: "fetch artifacts" + - func: "run-perf-test" + vars: + perf-test-name: 500m-btree-populate + maxruns: 1 + wtarg: -args ['"-C create,statistics=(fast),statistics_log=(json,wait=1,sources=[file:])"'] -ops ['"load", "warnings", "max_latency_insert"'] + - func: "upload-perf-test-stats" + vars: + perf-test-name: 500m-btree-populate + - func: "run-perf-test" + vars: + perf-test-name: 500m-btree-50r50u + maxruns: 1 + no_create: true + wtarg: -args ['"-C create,statistics=(fast),statistics_log=(json,wait=1,sources=[file:])"'] -ops ['"read", "update", "warnings", "max_latency_read_update"'] + - func: "upload-perf-test-stats" + vars: + perf-test-name: 500m-btree-50r50u + - func: "run-perf-test" + vars: + perf-test-name: 500m-btree-50r50u-backup + maxruns: 1 + no_create: true + wtarg: -args ['"-C create,statistics=(fast),statistics_log=(json,wait=1,sources=[file:])"'] -ops ['"read", "update", "warnings", "max_latency_read_update"'] + - func: "upload-perf-test-stats" + vars: + perf-test-name: 500m-btree-50r50u-backup + - func: "run-perf-test" + vars: + perf-test-name: 500m-btree-80r20u + maxruns: 1 + no_create: true + wtarg: -args ['"-C create,statistics=(fast),statistics_log=(json,wait=1,sources=[file:])"'] -ops ['"read", "update", "warnings", "max_latency_read_update"'] + - func: "upload-perf-test-stats" + vars: + perf-test-name: 500m-btree-80r20u + - func: "run-perf-test" + vars: + perf-test-name: 500m-btree-rdonly + maxruns: 1 + no_create: true + wtarg: -args ['"-C create,statistics=(fast),statistics_log=(json,wait=1,sources=[file:])"'] -ops ['"read", "warnings", "max_latency_read_update", "min_read_throughput", "max_read_throughput"'] + - func: "upload-perf-test-stats" + vars: + perf-test-name: 500m-btree-rdonly + + - name: perf-test-long-checkpoint-stress + tags: ["long-perf"] + depends_on: + - name: compile + commands: + - command: timeout.update + params: + exec_timeout_secs: 86400 + timeout_secs: 86400 + - func: "fetch artifacts" + - func: "run-perf-test" + vars: + perf-test-name: checkpoint-stress + maxruns: 1 + wtarg: -args ['"-C create,statistics=(fast),statistics_log=(json,wait=1,sources=[file:])"'] -ops ['"update", "checkpoint"'] + - func: "upload-perf-test-stats" + vars: + perf-test-name: checkpoint-stress ####################################### # Buildvariants # @@ -3887,6 +3970,7 @@ buildvariants: - name: ".checkpoint-perf" - name: ".evict-perf" - name: ".log-perf" + - name: ".long-perf" display_tasks: - name: Wiredtiger-perf-btree-jobs execution_tasks: @@ -3906,6 +3990,9 @@ buildvariants: - name: Wiredtiger-perf-log-jobs execution_tasks: - ".log-perf" + - name: Wiredtiger-perf-long-jobs + execution_tasks: + - ".long-perf" - name: large-scale-tests display_name: "Large scale tests" |