SERVER-33203 add benchmark canary tests

author: Robert Guo <robert.guo@10gen.com> 2018-02-19 15:59:34 -0500
committer: Robert Guo <robert.guo@10gen.com> 2018-03-02 15:56:33 -0500
commit: 721846a8b4b64d72338b46f91002d4a601c7dbba (patch)
tree: dc77b248f9039ad66bf0b803743d6bf2dd5c90a7 /src
parent: f3a0cf8400ad738cc15f0ed21bd00eebe972da50 (diff)
download: mongo-721846a8b4b64d72338b46f91002d4a601c7dbba.tar.gz
7 files changed, 368 insertions, 173 deletions
diff --git a/src/mongo/db/concurrency/SConscript b/src/mongo/db/concurrency/SConscript
index ba2a08dae90..b14ddc7e836 100644
--- a/src/mongo/db/concurrency/SConscript
+++ b/src/mongo/db/concurrency/SConscript
@@ -53,6 +53,16 @@ env.Library(
     ],
 )
 
+env.Benchmark(
+    target='lock_manager_bm',
+    source=[
+        'd_concurrency_bm.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/db/service_context_noop_init',
+        'lock_manager',
+    ])
+
 env.CppUnitTest(
     target='lock_manager_test',
     source=['d_concurrency_test.cpp',
diff --git a/src/mongo/db/concurrency/d_concurrency_bm.cpp b/src/mongo/db/concurrency/d_concurrency_bm.cpp
new file mode 100644
index 00000000000..341b4ff7b13
--- /dev/null
+++ b/src/mongo/db/concurrency/d_concurrency_bm.cpp
@@ -0,0 +1,185 @@
+/**
+ *    Copyright (C) 2018 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
+
+#include "mongo/platform/basic.h"
+#include <benchmark/benchmark.h>
+
+#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/concurrency/lock_manager_test_help.h"
+#include "mongo/db/storage/recovery_unit_noop.h"
+#include "mongo/stdx/mutex.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+const int kMaxPerfThreads = 16;  // max number of threads to use for lock perf
+
+
+class DConcurrencyTest : public benchmark::Fixture {
+public:
+    /**
+     * Returns a vector of Clients of length 'k', each of which has an OperationContext with its
+     * lockState set to a DefaultLockerImpl.
+     */
+    template <typename LockerType>
+    void makeKClientsWithLockers(int k) {
+        clients.reserve(k);
+        for (int i = 0; i < k; ++i) {
+            auto client = getGlobalServiceContext()->makeClient(
+                str::stream() << "test client for thread " << i);
+            auto opCtx = client->makeOperationContext();
+            opCtx->swapLockState(std::make_unique<LockerType>());
+            clients.emplace_back(std::move(client), std::move(opCtx));
+        }
+    }
+
+protected:
+    std::vector<std::pair<ServiceContext::UniqueClient, ServiceContext::UniqueOperationContext>>
+        clients;
+    std::array<DefaultLockerImpl, kMaxPerfThreads> locker;
+};
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_StdMutex)(benchmark::State& state) {
+    static stdx::mutex mtx;
+
+    for (auto keepRunning : state) {
+        stdx::unique_lock<stdx::mutex> lk(mtx);
+    }
+}
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_ResourceMutexShared)(benchmark::State& state) {
+    static Lock::ResourceMutex mtx("testMutex");
+
+    for (auto keepRunning : state) {
+        Lock::SharedLock lk(&locker[state.thread_index], mtx);
+    }
+}
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_ResourceMutexExclusive)(benchmark::State& state) {
+    static Lock::ResourceMutex mtx("testMutex");
+
+    for (auto keepRunning : state) {
+        Lock::ExclusiveLock lk(&locker[state.thread_index], mtx);
+    }
+}
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_CollectionIntentSharedLock)(benchmark::State& state) {
+    std::unique_ptr<ForceSupportsDocLocking> supportDocLocking;
+
+    if (state.thread_index == 0) {
+        makeKClientsWithLockers<DefaultLockerImpl>(state.threads);
+        supportDocLocking = std::make_unique<ForceSupportsDocLocking>(true);
+    }
+
+    for (auto keepRunning : state) {
+        Lock::DBLock dlk(clients[state.thread_index].second.get(), "test", MODE_IS);
+        Lock::CollectionLock clk(
+            clients[state.thread_index].second->lockState(), "test.coll", MODE_IS);
+    }
+
+    if (state.thread_index == 0) {
+        clients.clear();
+    }
+}
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_CollectionIntentExclusiveLock)(benchmark::State& state) {
+    std::unique_ptr<ForceSupportsDocLocking> supportDocLocking;
+
+    if (state.thread_index == 0) {
+        makeKClientsWithLockers<DefaultLockerImpl>(state.threads);
+        supportDocLocking = std::make_unique<ForceSupportsDocLocking>(true);
+    }
+
+    for (auto keepRunning : state) {
+        Lock::DBLock dlk(clients[state.thread_index].second.get(), "test", MODE_IX);
+        Lock::CollectionLock clk(
+            clients[state.thread_index].second->lockState(), "test.coll", MODE_IX);
+    }
+
+    if (state.thread_index == 0) {
+        clients.clear();
+    }
+}
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_MMAPv1CollectionSharedLock)(benchmark::State& state) {
+    std::unique_ptr<ForceSupportsDocLocking> supportDocLocking;
+
+    if (state.thread_index == 0) {
+        makeKClientsWithLockers<DefaultLockerImpl>(state.threads);
+        supportDocLocking = std::make_unique<ForceSupportsDocLocking>(false);
+    }
+
+    for (auto keepRunning : state) {
+        Lock::DBLock dlk(clients[state.thread_index].second.get(), "test", MODE_IS);
+        Lock::CollectionLock clk(
+            clients[state.thread_index].second->lockState(), "test.coll", MODE_S);
+    }
+
+    if (state.thread_index == 0) {
+        clients.clear();
+    }
+}
+
+BENCHMARK_DEFINE_F(DConcurrencyTest, BM_MMAPv1CollectionExclusiveLock)(benchmark::State& state) {
+    std::unique_ptr<ForceSupportsDocLocking> supportDocLocking;
+
+    if (state.thread_index == 0) {
+        makeKClientsWithLockers<DefaultLockerImpl>(state.threads);
+        supportDocLocking = std::make_unique<ForceSupportsDocLocking>(false);
+    }
+
+    for (auto keepRunning : state) {
+        Lock::DBLock dlk(clients[state.thread_index].second.get(), "test", MODE_IX);
+        Lock::CollectionLock clk(
+            clients[state.thread_index].second->lockState(), "test.coll", MODE_X);
+    }
+
+    if (state.thread_index == 0) {
+        clients.clear();
+    }
+}
+
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_StdMutex)->ThreadRange(1, kMaxPerfThreads);
+
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_ResourceMutexShared)->ThreadRange(1, kMaxPerfThreads);
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_ResourceMutexExclusive)->ThreadRange(1, kMaxPerfThreads);
+
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_CollectionIntentSharedLock)
+    ->ThreadRange(1, kMaxPerfThreads);
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_CollectionIntentExclusiveLock)
+    ->ThreadRange(1, kMaxPerfThreads);
+
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_MMAPv1CollectionSharedLock)
+    ->ThreadRange(1, kMaxPerfThreads);
+BENCHMARK_REGISTER_F(DConcurrencyTest, BM_MMAPv1CollectionExclusiveLock)
+    ->ThreadRange(1, kMaxPerfThreads);
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/concurrency/d_concurrency_test.cpp b/src/mongo/db/concurrency/d_concurrency_test.cpp
index b8560cd6129..81a9a15a7f8 100644
--- a/src/mongo/db/concurrency/d_concurrency_test.cpp
+++ b/src/mongo/db/concurrency/d_concurrency_test.cpp
@@ -53,9 +53,7 @@
 namespace mongo {
 namespace {
 
-const int kMaxPerfThreads = 16;    // max number of threads to use for lock perf
 const int kMaxStressThreads = 32;  // max number of threads to use for lock stress
-const int kMinPerfMillis = 30;     // min duration for reliable timing
 
 /**
  * A RAII object that instantiates a TicketHolder that limits number of allowed global lock
@@ -132,53 +130,6 @@ public:
         return result;
     }
 
-    /**
-     * Calls fn the given number of iterations, spread out over up to maxThreads threads.
-     * The threadNr passed is an integer between 0 and maxThreads exclusive. Logs timing
-     * statistics for for all power-of-two thread counts from 1 up to maxThreds.
-     */
-    void perfTest(stdx::function<void(int threadNr)> fn, int maxThreads) {
-        for (int numThreads = 1; numThreads <= maxThreads; numThreads *= 2) {
-            std::vector<stdx::thread> threads;
-
-            AtomicInt32 ready{0};
-            AtomicInt64 elapsedNanos{0};
-            AtomicInt64 timedIters{0};
-
-            for (int threadId = 0; threadId < numThreads; threadId++)
-                threads.emplace_back([&, threadId]() {
-                    // Busy-wait until everybody is ready
-                    ready.fetchAndAdd(1);
-                    while (ready.load() < numThreads) {
-                    }
-
-                    uint64_t micros = 0;
-                    int iters;
-                    // Ensure at least 16 iterations are done and at least 25 milliseconds is timed
-                    for (iters = 16; iters < (1 << 30) && micros < kMinPerfMillis * 1000;
-                         iters *= 2) {
-                        // Measure the number of loops
-                        Timer t;
-
-                        for (int i = 0; i < iters; i++)
-                            fn(threadId);
-
-                        micros = t.micros();
-                    }
-
-                    elapsedNanos.fetchAndAdd(micros * 1000);
-                    timedIters.fetchAndAdd(iters);
-                });
-
-            for (auto& thread : threads)
-                thread.join();
-
-            log() << numThreads << " threads took: "
-                  << elapsedNanos.load() / static_cast<double>(timedIters.load()) << " ns per call"
-                  << (kDebugBuild ? " (DEBUG BUILD!)" : "");
-        }
-    }
-
 private:
     ServiceContext::UniqueClient _client;
 };
@@ -1447,74 +1398,6 @@ TEST_F(DConcurrencyTestFixture, CompatibleFirstStress) {
     }
 }
 
-// These tests exercise single- and multi-threaded performance of uncontended lock acquisition. It
-// is neither practical nor useful to run them on debug builds.
-
-TEST_F(DConcurrencyTestFixture, PerformanceStdMutex) {
-    stdx::mutex mtx;
-    perfTest([&](int threadId) { stdx::unique_lock<stdx::mutex> lk(mtx); }, kMaxPerfThreads);
-}
-
-TEST_F(DConcurrencyTestFixture, PerformanceResourceMutexShared) {
-    Lock::ResourceMutex mtx("testMutex");
-    std::array<DefaultLockerImpl, kMaxPerfThreads> locker;
-    perfTest([&](int threadId) { Lock::SharedLock lk(&locker[threadId], mtx); }, kMaxPerfThreads);
-}
-
-TEST_F(DConcurrencyTestFixture, PerformanceResourceMutexExclusive) {
-    Lock::ResourceMutex mtx("testMutex");
-    std::array<DefaultLockerImpl, kMaxPerfThreads> locker;
-    perfTest([&](int threadId) { Lock::ExclusiveLock lk(&locker[threadId], mtx); },
-             kMaxPerfThreads);
-}
-
-TEST_F(DConcurrencyTestFixture, PerformanceCollectionIntentSharedLock) {
-    std::vector<std::pair<ServiceContext::UniqueClient, ServiceContext::UniqueOperationContext>>
-        clients = makeKClientsWithLockers<DefaultLockerImpl>(kMaxPerfThreads);
-    ForceSupportsDocLocking supported(true);
-    perfTest(
-        [&](int threadId) {
-            Lock::DBLock dlk(clients[threadId].second.get(), "test", MODE_IS);
-            Lock::CollectionLock clk(clients[threadId].second->lockState(), "test.coll", MODE_IS);
-        },
-        kMaxPerfThreads);
-}
-
-TEST_F(DConcurrencyTestFixture, PerformanceCollectionIntentExclusiveLock) {
-    std::vector<std::pair<ServiceContext::UniqueClient, ServiceContext::UniqueOperationContext>>
-        clients = makeKClientsWithLockers<DefaultLockerImpl>(kMaxPerfThreads);
-    ForceSupportsDocLocking supported(true);
-    perfTest(
-        [&](int threadId) {
-            Lock::DBLock dlk(clients[threadId].second.get(), "test", MODE_IX);
-            Lock::CollectionLock clk(clients[threadId].second->lockState(), "test.coll", MODE_IX);
-        },
-        kMaxPerfThreads);
-}
-
-TEST_F(DConcurrencyTestFixture, PerformanceMMAPv1CollectionSharedLock) {
-    std::vector<std::pair<ServiceContext::UniqueClient, ServiceContext::UniqueOperationContext>>
-        clients = makeKClientsWithLockers<DefaultLockerImpl>(kMaxPerfThreads);
-    ForceSupportsDocLocking supported(false);
-    perfTest(
-        [&](int threadId) {
-            Lock::DBLock dlk(clients[threadId].second.get(), "test", MODE_IS);
-            Lock::CollectionLock clk(clients[threadId].second->lockState(), "test.coll", MODE_S);
-        },
-        kMaxPerfThreads);
-}
-
-TEST_F(DConcurrencyTestFixture, PerformanceMMAPv1CollectionExclusive) {
-    std::vector<std::pair<ServiceContext::UniqueClient, ServiceContext::UniqueOperationContext>>
-        clients = makeKClientsWithLockers<DefaultLockerImpl>(kMaxPerfThreads);
-    ForceSupportsDocLocking supported(false);
-    perfTest(
-        [&](int threadId) {
-            Lock::DBLock dlk(clients[threadId].second.get(), "test", MODE_IX);
-            Lock::CollectionLock clk(clients[threadId].second->lockState(), "test.coll", MODE_X);
-        },
-        kMaxPerfThreads);
-}
 
 namespace {
 class RecoveryUnitMock : public RecoveryUnitNoop {
diff --git a/src/mongo/unittest/SConscript b/src/mongo/unittest/SConscript
index e00502d94f7..b4e137ff4b9 100644
--- a/src/mongo/unittest/SConscript
+++ b/src/mongo/unittest/SConscript
@@ -40,12 +40,13 @@ env.Library(target="integration_test_main",
 bmEnv = env.Clone()
 bmEnv.InjectThirdPartyIncludePaths(libraries=['benchmark'])
 bmEnv.Library(
-    target="benchmark_main",
+    target='benchmark_main',
     source=[
         'benchmark_main.cpp'
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/third_party/shim_benchmark',
     ],
 )
 
@@ -54,9 +55,12 @@ env.CppUnitTest('fixture_test', 'fixture_test.cpp')
 env.CppUnitTest('temp_dir_test', 'temp_dir_test.cpp')
 
 env.Benchmark(
-    target='benchmark_test',
+    target='system_resource_canary_bm',
     source=[
-        'benchmark_test.cpp',
+        'system_resource_canary_bm.cpp'
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/util/processinfo',
     ],
 )
 
diff --git a/src/mongo/unittest/benchmark_test.cpp b/src/mongo/unittest/benchmark_test.cpp
deleted file mode 100644
index 18e0466cbb5..00000000000
--- a/src/mongo/unittest/benchmark_test.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- *    Copyright (C) 2018 MongoDB Inc.
- *
- *    This program is free software: you can redistribute it and/or  modify
- *    it under the terms of the GNU Affero General Public License, version 3,
- *    as published by the Free Software Foundation.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU Affero General Public License for more details.
- *
- *    You should have received a copy of the GNU Affero General Public License
- *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the prograxm with the OpenSSL library. You
- *    must comply with the GNU Affero General Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include <benchmark/benchmark.h>
-
-namespace {
-
-// This is a trivial test case to sanity check that "benchmark" runs.
-// For more information on how benchmarks should be written, please refer to Google Benchmark's
-// excellent README: https://github.com/google/benchmark/blob/v1.3.0/README.md
-void BM_empty(benchmark::State& state) {
-    for (auto keepRunning : state) {
-        // The code inside this for-loop is what's being timed.
-        benchmark::DoNotOptimize(state.iterations());
-    }
-}
-
-// Register two benchmarks, one runs the "BM_empty" function in a single thread, the other runs a
-// copy per CPU core.
-BENCHMARK(BM_empty);
-BENCHMARK(BM_empty)->ThreadPerCpu();
-
-}  // namespace
diff --git a/src/mongo/unittest/system_resource_canary_bm.cpp b/src/mongo/unittest/system_resource_canary_bm.cpp
new file mode 100644
index 00000000000..cd060ccad40
--- /dev/null
+++ b/src/mongo/unittest/system_resource_canary_bm.cpp
@@ -0,0 +1,163 @@
+/**
+ *    Copyright (C) 2018 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the prograxm with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <benchmark/benchmark.h>
+
+#include "mongo/util/assert_util.h"
+#include "mongo/util/processinfo.h"
+#include "mongo/util/time_support.h"
+
+namespace mongo {
+namespace {
+
+// This is a trivial test case to sanity check that "benchmark" runs.
+// For more information on how benchmarks should be written, please refer to Google Benchmark's
+// excellent README: https://github.com/google/benchmark/blob/v1.3.0/README.md
+void BM_Empty(benchmark::State& state) {
+    for (auto keepRunning : state) {
+        // The code inside this for-loop is what's being timed.
+        benchmark::DoNotOptimize(state.iterations());
+    }
+}
+
+// Register two benchmarks, one runs the "BM_Empty" function in a single thread, the other runs a
+// copy per CPU core.
+BENCHMARK(BM_Empty);
+BENCHMARK(BM_Empty)->ThreadPerCpu();
+
+void BM_CpuLoad(benchmark::State& state) {
+    for (auto keepRunning : state) {
+        uint64_t limit = 10000;
+        uint64_t lresult = 0;
+        uint64_t x = 100;
+        for (uint64_t i = 0; i < limit; i++) {
+            benchmark::DoNotOptimize(x *= 13);
+        }
+        benchmark::DoNotOptimize(lresult = x);
+    }
+}
+
+// This Benchmark is adapted from the `cpuload` command:
+// https://github.com/mongodb/mongo/blob/r3.7.2/src/mongo/db/commands/cpuload.cpp
+BENCHMARK(BM_CpuLoad)->Threads(1)->ThreadPerCpu();
+
+
+void BM_Sleep(benchmark::State& state) {
+    for (auto keepRunning : state) {
+        sleepmillis(100);
+    }
+}
+
+BENCHMARK(BM_Sleep)->Threads(1)->ThreadPerCpu();
+
+
+// Generate a loop with macros.
+#define ONE ptrToNextLinkedListNode = reinterpret_cast<char**>(*ptrToNextLinkedListNode);
+#define FIVE ONE ONE ONE ONE ONE
+#define TEN FIVE FIVE
+#define FIFTY TEN TEN TEN TEN TEN
+#define HUNDRED FIFTY FIFTY
+
+// Stride is the number of elements to skip when traversing the array.
+// It should ideally be >= the cache line to avoid side-effects from pre-fetching.
+const int kStrideBytes = 64;
+
+class CacheLatencyTest : public benchmark::Fixture {
+    // Fixture for CPU Cache and RAM latency test. Adapted from lmbench's lat_mem_rd test.
+public:
+    // Array of pointers used as a linked list.
+    std::unique_ptr<char* []> data;
+
+    void SetUp(benchmark::State& state) override {
+        if (state.thread_index == 0) {
+            fassert(data.get() == nullptr, "'data' is not null");
+
+            /*
+             * Create a circular list of pointers using a simple striding
+             * algorithm.
+             */
+            const int arrLength = state.range(0);
+            int counter = 0;
+
+            data = std::make_unique<char* []>(arrLength);
+
+            char** arr = data.get();
+
+            /*
+             * This access pattern corresponds to many array/matrix algorithms.
+             * It should be easily and correctly predicted by any decent hardware
+             * prefetch algorithm.
+             */
+            for (counter = 0; counter < arrLength - kStrideBytes; counter += kStrideBytes) {
+                arr[counter] = reinterpret_cast<char*>(&arr[counter + kStrideBytes]);
+            }
+            arr[counter] = reinterpret_cast<char*>(&arr[0]);
+        }
+    }
+
+    void TearDown(benchmark::State& state) override {
+        if (state.thread_index == 0) {
+            fassert(data.get() != nullptr, "'data' is null");
+            data.reset();
+        }
+    }
+};
+
+
+BENCHMARK_DEFINE_F(CacheLatencyTest, BM_CacheLatency)(benchmark::State& state) {
+    size_t arrLength = state.range(0);
+    size_t counter = arrLength / (kStrideBytes * 100) + 1;
+
+    for (auto keepRunning : state) {
+        char** dummyResult = 0;  // Dummy result to prevent the loop from being optimized out.
+        char** ptrToNextLinkedListNode = reinterpret_cast<char**>(data.get()[0]);
+
+        for (size_t i = 0; i < counter; ++i) {
+            HUNDRED;
+        }
+        benchmark::DoNotOptimize(dummyResult = ptrToNextLinkedListNode);
+    }
+
+    // Record the number of times we accessed the cache so Benchmark can compute the average latency
+    // of each access. This allows comparing access latency across caches of different sizes.
+    state.SetItemsProcessed(state.iterations() * counter * 100);
+}
+
+BENCHMARK_REGISTER_F(CacheLatencyTest, BM_CacheLatency)
+    ->RangeMultiplier(2 * 1024)
+    // Loop over arrays of different sizes to test the L2, L3, and RAM latency.
+    ->Range(256 * 1024, 4096 * 1024)
+    ->ThreadRange(1, [] {
+        ProcessInfo pi;
+        return static_cast<int>(pi.getNumAvailableCores().value_or(pi.getNumCores()));
+    }());
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/util/clock_source_bm.cpp b/src/mongo/util/clock_source_bm.cpp
index af61bc1065e..67af4407092 100644
--- a/src/mongo/util/clock_source_bm.cpp
+++ b/src/mongo/util/clock_source_bm.cpp
@@ -46,7 +46,7 @@ namespace {
  * All threads executing the benchmark use the same instance of the clock source,
  * to allow benchmarking to identify synchronization costs inside the now() method.
  */
-void BM_clockNow(benchmark::State& state) {
+void BM_ClockNow(benchmark::State& state) {
     static std::unique_ptr<ClockSource> clock;
     if (state.thread_index == 0) {
         if (state.range(0) > 0) {
@@ -59,7 +59,7 @@ void BM_clockNow(benchmark::State& state) {
         }
     }
 
-    for (auto _ : state) {
+    for (auto keepRunning : state) {
         benchmark::DoNotOptimize(clock->now());
     }
 
@@ -68,7 +68,7 @@ void BM_clockNow(benchmark::State& state) {
     }
 }
 
-BENCHMARK(BM_clockNow)
+BENCHMARK(BM_ClockNow)
     ->ThreadRange(1,
                   [] {
                       ProcessInfo pi;
author	Robert Guo <robert.guo@10gen.com>	2018-02-19 15:59:34 -0500
committer	Robert Guo <robert.guo@10gen.com>	2018-03-02 15:56:33 -0500
commit	721846a8b4b64d72338b46f91002d4a601c7dbba (patch)
tree	dc77b248f9039ad66bf0b803743d6bf2dd5c90a7 /src
parent	f3a0cf8400ad738cc15f0ed21bd00eebe972da50 (diff)
download	mongo-721846a8b4b64d72338b46f91002d4a601c7dbba.tar.gz