Import wiredtiger: 0c90aafd226dbf337f294bd4b3beb26060c1109e from branch mongodb-master

ref: 40af161e26..0c90aafd22 for: 5.2.0 WT-5270 create wtperf script for evergreen
author: Chenhao Qu <chenhao.qu@mongodb.com> 2021-10-06 06:50:01 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-10-06 07:30:19 +0000
commit: 437af4d512b729d8a2a6e765eade41bcf5be6f95 (patch)
tree: b02f1009a129593136b25ccb5ff7880a7f170f9c
parent: e6fa82d1ed1e6d4d8d25b8354273264ca5830e75 (diff)
download: mongo-437af4d512b729d8a2a6e765eade41bcf5be6f95.tar.gz
6 files changed, 434 insertions, 1 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py
new file mode 100644
index 00000000000..de62d328373
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat.py
@@ -0,0 +1,45 @@
+class PerfStat:
+    def __init__(self,
+                 short_label: str,
+                 pattern: str,
+                 input_offset: int,
+                 output_label: str,
+                 output_precision: int = 0,
+                 conversion_function=int):
+        self.short_label: str = short_label
+        self.pattern: str = pattern
+        self.input_offset: int = input_offset
+        self.output_label: str = output_label
+        self.output_precision: int = output_precision
+        self.conversion_function = conversion_function
+        self.values = []
+
+    def add_value(self, value):
+        converted_value = self.conversion_function(value)
+        self.values.append(converted_value)
+
+    def get_num_values(self):
+        return len(self.values)
+
+    def get_average(self):
+        num_values = len(self.values)
+        total = sum(self.values)
+        average = self.conversion_function(total / num_values)
+        return average
+
+    def get_skipminmax_average(self):
+        num_values = len(self.values)
+        assert num_values >= 3
+        minimum = min(self.values)
+        maximum = max(self.values)
+        total = sum(self.values)
+        total_skipminmax = total - maximum - minimum
+        num_values_skipminmax = num_values - 2
+        skipminmax_average = self.conversion_function(total_skipminmax / num_values_skipminmax)
+        return skipminmax_average
+
+    def get_core_average(self):
+        if len(self.values) >= 3:
+            return self.get_skipminmax_average()
+        else:
+            return self.get_average()
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py
new file mode 100644
index 00000000000..d2784901311
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/perf_stat_collection.py
@@ -0,0 +1,37 @@
+import re
+from perf_stat import PerfStat
+
+def find_stat(test_stat_path: str, pattern: str, position_of_value: int):
+    for line in open(test_stat_path):
+        match = re.match(pattern, line)
+        if match:
+            return float(line.split()[position_of_value])
+    return 0
+
+class PerfStatCollection:
+    def __init__(self):
+        self.perf_stats = {}
+
+    def add_stat(self, perf_stat: PerfStat):
+        self.perf_stats[perf_stat.short_label] = perf_stat
+
+    def find_stats(self, test_stat_path: str):
+        for stat in self.perf_stats.values():
+            value = find_stat(test_stat_path=test_stat_path,
+                              pattern=stat.pattern,
+                              position_of_value=stat.input_offset)
+            stat.add_value(value=value)
+
+    def to_value_list(self):
+        as_list = []
+        for stat in self.perf_stats.values():
+            as_list.append({
+                'name': stat.output_label,
+                'value': stat.get_core_average(),
+                'values': stat.values
+            })
+        return as_list
+
+    def to_dict(self):
+        as_dict = {'metrics': self.to_value_list()}
+        return as_dict
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py
new file mode 100644
index 00000000000..d2b70dff2dc
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_config.py
@@ -0,0 +1,23 @@
+class WTPerfConfig:
+    def __init__(self,
+                 wtperf_path: str,
+                 home_dir: str,
+                 test: str,
+                 environment: str = None,
+                 run_max: int = 1,
+                 verbose: bool = False):
+        self.wtperf_path: str = wtperf_path
+        self.home_dir: str = home_dir
+        self.test: str = test
+        self.environment: str = environment
+        self.run_max: int = run_max
+        self.verbose: bool = verbose
+
+    def to_value_dict(self):
+        as_dict = {'wt_perf_path': self.wtperf_path,
+                   'test': self.test,
+                   'home_dir': self.home_dir,
+                   'environment': self.environment,
+                   'run_max': self.run_max,
+                   'verbose': self.verbose}
+        return as_dict
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py
new file mode 100644
index 00000000000..fcd9e8d0cb8
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_run_py/wtperf_run.py
@@ -0,0 +1,167 @@
+import argparse
+import json
+import os.path
+import re
+import subprocess
+import sys
+import platform
+import psutil
+
+from wtperf_config import WTPerfConfig
+from perf_stat import PerfStat
+from perf_stat_collection import PerfStatCollection
+
+# example parameters: -p /Users/jeremy.thorp/Git/wiredtiger/build/bench/wtperf/wtperf -t ../runners/small-lsm.wtperf -v -ho WT_TEST -m 3
+
+# the 'test.stat' file is where wt-perf.c writes out it's statistics
+# (within the directory specified by the 'home' parameter)
+test_stats_file = 'test.stat'
+
+def create_test_home_path(home: str, test_run: int):
+    return '{}_{}'.format(home, test_run)
+
+def create_test_stat_path(test_home_path: str):
+    return os.path.join(test_home_path, test_stats_file)
+
+def find_stat(test_stat_path: str, pattern: str, position_of_value: int):
+    for line in open(test_stat_path):
+        match = re.match(pattern, line)
+        if match:
+            return line.split()[position_of_value]
+    return 0
+
+def construct_wtperf_command_line(wtperf: str, env: str, test: str, home: str):
+    command_line = []
+    if env is not None:
+        command_line.append(env)
+    command_line.append(wtperf)
+    if test is not None:
+        command_line.append('-O')
+        command_line.append(test)
+    if home is not None:
+        command_line.append('-h')
+        command_line.append(home)
+    return command_line
+
+def run_test(config: WTPerfConfig, test_run: int):
+    test_home = create_test_home_path(home=config.home_dir, test_run=test_run)
+    command_line = construct_wtperf_command_line(
+        wtperf=config.wtperf_path,
+        env=config.environment,
+        test=config.test,
+        home=test_home)
+    # print('Command Line for test: {}'.format(command_line))
+    subprocess.run(command_line)
+
+def process_results(config: WTPerfConfig, perf_stats: PerfStatCollection):
+    for test_run in range(config.run_max):
+        test_home = create_test_home_path(home=config.home_dir, test_run=test_run)
+        test_stats_path = create_test_stat_path(test_home)
+        if config.verbose:
+            print('Reading test stats file: {}'.format(test_stats_path))
+        perf_stats.find_stats(test_stat_path=test_stats_path)
+
+    total_memory_gb = psutil.virtual_memory().total / (1024 * 1024 * 1024)
+    as_dict = {'config': config.to_value_dict(),
+               'metrics': perf_stats.to_value_list(),
+               'system': {
+                   'cpu_physical_cores': psutil.cpu_count(logical=False),
+                   'cpu_logical_cores': psutil.cpu_count(),
+                   'total_physical_memory_gb': total_memory_gb,
+                   'platform': platform.platform()}
+               }
+    return as_dict
+
+def setup_perf_stats():
+    perf_stats = PerfStatCollection()
+    perf_stats.add_stat(PerfStat(short_label="load",
+                                 pattern='Load time:',
+                                 input_offset=2,
+                                 output_label='Load time:',
+                                 output_precision=2,
+                                 conversion_function=float))
+    perf_stats.add_stat(PerfStat(short_label="insert",
+                                 pattern='Executed \d+ insert operations',
+                                 input_offset=1,
+                                 output_label='Insert count:'))
+    perf_stats.add_stat(PerfStat(short_label="modify",
+                                 pattern='Executed \d+ modify operations',
+                                 input_offset=1,
+                                 output_label='Modify count:'))
+    perf_stats.add_stat(PerfStat(short_label="read",
+                                 pattern='Executed \d+ read operations',
+                                 input_offset=1,
+                                 output_label='Read count:'))
+    perf_stats.add_stat(PerfStat(short_label="truncate",
+                                 pattern='Executed \d+ truncate operations',
+                                 input_offset=1,
+                                 output_label='Truncate count:'))
+    perf_stats.add_stat(PerfStat(short_label="update",
+                                 pattern='Executed \d+ update operations',
+                                 input_offset=1,
+                                 output_label='Update count:'))
+    return perf_stats
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', '--wtperf', help='path of the wtperf executable')
+    parser.add_argument('-e', '--env', help='any environment variables that need to be set for running wtperf')
+    parser.add_argument('-t', '--test', help='path of the wtperf test to execute')
+    parser.add_argument('-o', '--outfile', help='path of the file to write test output to')
+    parser.add_argument('-m', '--runmax', type=int, default=1, help='maximum number of times to run the test')
+    parser.add_argument('-ho', '--home', help='path of the "home" directory that wtperf will use')
+    parser.add_argument('-re',
+                        '--reuse',
+                        action="store_true",
+                        help='reuse and reanalyse results from previous tests rather than running tests again')
+    parser.add_argument('-v', '--verbose', action="store_true", help='be verbose')
+    args = parser.parse_args()
+
+    if args.verbose:
+        print('WTPerfPy')
+        print('========')
+        print("Configuration:")
+        print("  WtPerf path:   {}".format(args.wtperf))
+        print("  Environment:   {}".format(args.env))
+        print("  Test path:     {}".format(args.test))
+        print("  Home base:     {}".format(args.home))
+        print("  Outfile:       {}".format(args.outfile))
+        print("  Runmax:        {}".format(args.runmax))
+        print("  Reuse results: {}".format(args.reuse))
+
+    if args.wtperf is None:
+        sys.exit('The path to the wtperf executable is required')
+    if args.test is None:
+        sys.exit('The path to the test file is required')
+    if args.home is None:
+        sys.exit('The path to the "home" directory is required')
+
+    config = WTPerfConfig(wtperf_path=args.wtperf,
+                          home_dir=args.home,
+                          test=args.test,
+                          environment=args.env,
+                          run_max=args.runmax,
+                          verbose=args.verbose)
+
+    perf_stats: PerfStatCollection = setup_perf_stats()
+
+    # Run tests (if we're not reusing results)
+    if not args.reuse:
+        for test_run in range(args.runmax):
+            print("Starting test  {}".format(test_run))
+            run_test(config=config, test_run=test_run)
+            print("Completed test {}".format(test_run))
+
+    # Process results
+    perf_dict = process_results(config, perf_stats)
+    perf_json = json.dumps(perf_dict, indent=4, sort_keys=True)
+
+    if args.verbose:
+        print("JSON: {}".format(perf_json))
+
+    if args.outfile:
+        with open(args.outfile, 'w') as outfile:
+            json.dump(perf_dict, outfile, indent=4, sort_keys=True)
+
+if __name__ == '__main__':
+    main()
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 5a0958197b2..821289e1741 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-master",
-    "commit": "40af161e2601336298f57ce1ddb9b0edd0bc1332"
+    "commit": "0c90aafd226dbf337f294bd4b3beb26060c1109e"
 }
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 1c4701191ac..a775a9aec7a 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -554,6 +554,35 @@ functions:
           done
         done
 
+  "generic-perf-test":
+    # Run a performance test
+    # Parameterised using the 'perf-test-name' and 'maxruns' variables
+    - command: shell.exec
+      params:
+        working_dir: "wiredtiger/bench/wtperf/wtperf_run_py"
+        shell: bash
+        script: |
+          set -o errexit
+          set -o verbose
+          ${virtualenv_binary} -p ${python_binary} venv
+          source venv/bin/activate
+          ${pip3_binary} install psutil
+          ${python_binary} wtperf_run.py -p ../../../cmake_build/bench/wtperf/wtperf -t ../runners/${perf-test-name}.wtperf -v -ho WT_TEST -m ${maxruns} -o out.json
+
+  "generic-perf-test-push-results":
+    # Push the json results to the 'Files' tab of the task in Evergreen
+    # Parameterised using the 'perf-test-name' variable
+    - command: s3.put
+      params:
+        aws_secret: ${aws_secret}
+        aws_key: ${aws_key}
+        local_file: wiredtiger/bench/wtperf/wtperf_run_py/out.json
+        bucket: build_external
+        permissions: public-read
+        content_type: text/html
+        display_name: "Test results (JSON)"
+        remote_file: wiredtiger/${build_variant}/${revision}/perf-test-${perf-test-name}-${build_id}-${execution}/test-results.json
+
 #########################################################################################
 # VARIABLES
 #
@@ -2873,6 +2902,111 @@ tasks:
             set -o errexit
             python "../metrixplusplus/metrix++.py" limit --max-limit=std.code.complexity:cyclomatic:95
 
+    #############################
+    # Performance Tests for lsm #
+    #############################
+
+  - name: perf-test-small-lsm
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: small-lsm
+          maxruns: 3
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: small-lsm
+
+  - name: perf-test-medium-lsm
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: medium-lsm
+          maxruns: 1
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: medium-lsm
+
+  - name: perf-test-medium-lsm-compact
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: medium-lsm-compact
+          maxruns: 1
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: medium-lsm-compact
+
+  - name: perf-test-medium-multi-lsm
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: medium-multi-lsm
+          maxruns: 1
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: medium-multi-lsm
+
+    ###############################
+    # Performance Tests for btree #
+    ###############################
+
+  - name: perf-test-small-btree
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: small-btree
+          maxruns: 1
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: small-btree
+
+  - name: perf-test-small-btree-backup
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: small-btree-backup
+          maxruns: 1
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: small-btree-backup
+
+  - name: perf-test-medium-btree
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: medium-btree
+          maxruns: 3
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: medium-btree
+
+  - name: perf-test-medium-btree-backup
+    commands:
+      - func: "get project"
+      - func: "compile wiredtiger"
+      - func: "generic-perf-test"
+        vars:
+          perf-test-name: medium-btree-backup
+          maxruns: 3
+      - func: "generic-perf-test-push-results"
+        vars:
+          perf-test-name: medium-btree-backup
+
+
 buildvariants:
 
 - name: ubuntu2004
@@ -3051,6 +3185,33 @@ buildvariants:
     - name: ".stress-test-4"
     - name: format-abort-recovery-stress-test
 
+- name: ubuntu2004-perf-tests
+  display_name: Ubuntu 20.04 Performance tests
+  run_on:
+    - ubuntu2004-test
+  expansions:
+    test_env_vars: LD_LIBRARY_PATH=$(pwd) WT_BUILDDIR=$(pwd)
+    posix_configure_flags: -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/mongodbtoolchain_v3_gcc.cmake -DCMAKE_C_FLAGS="-ggdb" -DHAVE_DIAGNOSTIC=1 -DENABLE_ZLIB=1  -DENABLE_SNAPPY=1 -DENABLE_STRICT=1 -DCMAKE_INSTALL_PREFIX=$(pwd)/LOCAL_INSTALL
+    python_binary: '/opt/mongodbtoolchain/v3/bin/python3'
+    pip3_binary: '/opt/mongodbtoolchain/v3/bin/pip3'
+    virtualenv_binary: '/opt/mongodbtoolchain/v3/bin/virtualenv'
+    smp_command: -j $(echo "`grep -c ^processor /proc/cpuinfo` * 2" | bc)
+    cmake_generator: Ninja
+    make_command: ninja
+    is_cmake_build: true
+  tasks:
+    # btree tests
+    - name: perf-test-small-btree
+    - name: perf-test-small-btree-backup
+    - name: perf-test-medium-btree
+    - name: perf-test-medium-btree-backup
+    # lsm tests
+    - name: perf-test-small-lsm
+    - name: perf-test-medium-lsm
+    - name: perf-test-medium-lsm-compact
+    - name: perf-test-medium-multi-lsm
+
+
 - name: large-scale-tests
   display_name: "Large scale tests"
   batchtime: 480 # 3 times a day
author	Chenhao Qu <chenhao.qu@mongodb.com>	2021-10-06 06:50:01 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-10-06 07:30:19 +0000
commit	437af4d512b729d8a2a6e765eade41bcf5be6f95 (patch)
tree	b02f1009a129593136b25ccb5ff7880a7f170f9c
parent	e6fa82d1ed1e6d4d8d25b8354273264ca5830e75 (diff)
download	mongo-437af4d512b729d8a2a6e765eade41bcf5be6f95.tar.gz