23 files changed, 559 insertions, 258 deletions
diff --git a/README.third_party.md b/README.third_party.md
index 86b004aa736..d731f74843e 100644
--- a/README.third_party.md
+++ b/README.third_party.md
@@ -24,7 +24,7 @@ a notice will be included in
 | [abseil-cpp]               | Apache-2.0        |                  | 070f6e47b3        |                      |                ✗                |
 | Aladdin MD5                | Zlib              |                  | Unknown           |          ✗           |                ✗                |
 | [ASIO]                     | BSL-1.0           | 1.16.1           | b0926b61b0        |                      |                ✗                |
-| [benchmark]                | Apache-2.0        | 1.5.1            | 1.5.0             |                      |                                 |
+| [benchmark]                | Apache-2.0        | 1.5.2            | 1.5.2             |                      |                                 |
 | [Boost]                    | BSL-1.0           | 1.73.0           | 1.70.0            |                      |                ✗                |
 | [fmt]                      | BSD-2-Clause      | 7.1.3            | 7.1.3             |                      |                ✗                |
 | [GPerfTools]               | BSD-3-Clause      | 2.8              | 2.8               |                      |                ✗                |
diff --git a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
index 6bc5ba22dbf..db637793d5c 100644
--- a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
+++ b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
@@ -131,10 +131,23 @@ class _BenchmarkThreadsReport(object):
     """
 
     CONTEXT_FIELDS = [
-        "date", "cpu_scaling_enabled", "num_cpus", "mhz_per_cpu", "library_build_type",
-        "executable", "caches"
+        "date",
+        "num_cpus",
+        "mhz_per_cpu",
+        "library_build_type",
+        "executable",
+        "caches",
+        "cpu_scaling_enabled",
     ]
-    Context = collections.namedtuple("Context", CONTEXT_FIELDS)  # type: ignore
+
+    Context = collections.namedtuple(
+        typename="Context",
+        field_names=CONTEXT_FIELDS,
+        # We need a default for cpu_scaling_enabled, since newer
+        # google benchmark doesn't report a value if it can't make a
+        # determination.
+        defaults=["unknown"],
+    )  # type: ignore
 
     def __init__(self, context_dict):
         # `context_dict` was parsed from a json file and might have additional fields.
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 75e63cf34b1..1d142c56904 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -11564,16 +11564,12 @@ buildvariants:
   modules:
   - enterprise
   run_on:
-  - centos6-perf
+  - rhel80-medium
   expansions:
     compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars
   tasks:
   - name: compile_benchmarks
-    distros:
-    - rhel80-medium
   - name: .benchmarks
-    distros:
-    - centos6-perf
 
 - name: enterprise-rhel-80-64-bit-inmem
   display_name: Enterprise RHEL 8.0 (inMemory)
diff --git a/src/third_party/benchmark/dist/README.md b/src/third_party/benchmark/dist/README.md
index 45e41588438..41a1bdff757 100644
--- a/src/third_party/benchmark/dist/README.md
+++ b/src/third_party/benchmark/dist/README.md
@@ -1,10 +1,10 @@
 # Benchmark
+
 [![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark)
 [![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master)
 [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark)
 [![slackin](https://slackin-iqtfqnpzxd.now.sh/badge.svg)](https://slackin-iqtfqnpzxd.now.sh/)
 
-
 A library to benchmark code snippets, similar to unit tests. Example:
 
 ```c++
@@ -49,9 +49,11 @@ The following minimum versions are required to build the library:
 
 * GCC 4.8
 * Clang 3.4
-* Visual Studio 2013
+* Visual Studio 14 2015
 * Intel 2015 Update 1
 
+See [Platform-Specific Build Instructions](#platform-specific-build-instructions).
+
 ## Installation
 
 This describes the installation process using cmake. As pre-requisites, you'll
@@ -65,37 +67,40 @@ versions of build tools._
 $ git clone https://github.com/google/benchmark.git
 # Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory.
 $ git clone https://github.com/google/googletest.git benchmark/googletest
+# Go to the library root directory
+$ cd benchmark
 # Make a build directory to place the build output.
-$ mkdir build && cd build
-# Generate a Makefile with cmake.
-# Use cmake -G <generator> to generate a different file type.
-$ cmake ../benchmark
+$ cmake -E make_directory "build"
+# Generate build system files with cmake.
+$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../
+# or, starting with CMake 3.13, use a simpler form:
+# cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build"
 # Build the library.
-$ make
+$ cmake --build "build" --config Release
 ```
 This builds the `benchmark` and `benchmark_main` libraries and tests.
 On a unix system, the build directory should now look something like this:
 
 ```
 /benchmark
-/build
-  /src
-    /libbenchmark.a
-    /libbenchmark_main.a
-  /test
-    ...
+  /build
+    /src
+      /libbenchmark.a
+      /libbenchmark_main.a
+    /test
+      ...
 ```
 
 Next, you can run the tests to check the build.
 
 ```bash
-$ make test
+$ cmake -E chdir "build" ctest --build-config Release
 ```
 
 If you want to install the library globally, also run:
 
 ```
-sudo make install
+sudo cmake --build "build" --config Release --target install
 ```
 
 Note that Google Benchmark requires Google Test to build and run the tests. This
@@ -112,17 +117,14 @@ to `CMAKE_ARGS`.
 ### Debug vs Release
 
 By default, benchmark builds as a debug library. You will see a warning in the
-output when this is the case. To build it as a release library instead, use:
+output when this is the case. To build it as a release library instead, add
+`-DCMAKE_BUILD_TYPE=Release` when generating the build system files, as shown
+above. The use of `--config Release` in build commands is needed to properly
+support multi-configuration tools (like Visual Studio for example) and can be
+skipped for other build systems (like Makefile).
 
-```
-cmake -DCMAKE_BUILD_TYPE=Release
-```
-
-To enable link-time optimisation, use
-
-```
-cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true
-```
+To enable link-time optimisation, also add `-DBENCHMARK_ENABLE_LTO=true` when
+generating the build system files.
 
 If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake
 cache variables, if autodetection fails.
@@ -130,7 +132,6 @@ cache variables, if autodetection fails.
 If you are using clang, you may need to set `LLVMAR_EXECUTABLE`,
 `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.
 
-
 ### Stable and Experimental Library Versions
 
 The main branch contains the latest stable version of the benchmarking library;
@@ -144,7 +145,9 @@ this branch. However, this branch provides no stability guarantees and reserves
 the right to change and break the API at any time.
 
 ## Usage
+
 ### Basic usage
+
 Define a function that executes the code to measure, register it as a benchmark
 function using the `BENCHMARK` macro, and ensure an appropriate `main` function
 is available:
@@ -171,14 +174,14 @@ BENCHMARK_MAIN();
 ```
 
 To run the benchmark, compile and link against the `benchmark` library
-(libbenchmark.a/.so). If you followed the build steps above, this
-library will be under the build directory you created.
+(libbenchmark.a/.so). If you followed the build steps above, this library will 
+be under the build directory you created.
 
 ```bash
 # Example on linux after running the build steps above. Assumes the
 # `benchmark` and `build` directories are under the current directory.
-$ g++ -std=c++11 -isystem benchmark/include -Lbuild/src -lpthread \
-  -lbenchmark mybenchmark.cc -o mybenchmark
+$ g++ mybenchmark.cc -std=c++11 -isystem benchmark/include \
+  -Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark
 ```
 
 Alternatively, link against the `benchmark_main` library and remove
@@ -187,7 +190,29 @@ Alternatively, link against the `benchmark_main` library and remove
 The compiled executable will run all benchmarks by default. Pass the `--help`
 flag for option information or see the guide below.
 
-### Platform-specific instructions
+### Usage with CMake
+
+If using CMake, it is recommended to link against the project-provided
+`benchmark::benchmark` and `benchmark::benchmark_main` targets using
+`target_link_libraries`.
+It is possible to use ```find_package``` to import an installed version of the
+library.
+```cmake
+find_package(benchmark REQUIRED)
+```
+Alternatively, ```add_subdirectory``` will incorporate the library directly in
+to one's CMake project.
+```cmake
+add_subdirectory(benchmark)
+```
+Either way, link to the library as follows.
+```cmake
+target_link_libraries(MyTarget benchmark::benchmark)
+```
+
+## Platform Specific Build Instructions
+
+### Building with GCC
 
 When the library is built using GCC it is necessary to link with the pthread
 library due to how GCC implements `std::thread`. Failing to link to pthread will
@@ -197,8 +222,34 @@ can link to pthread by adding `-pthread` to your linker command. Note, you can
 also use `-lpthread`, but there are potential issues with ordering of command
 line parameters if you use that.
 
-If you're running benchmarks on Windows, the shlwapi library (`-lshlwapi`) is
-also required.
+### Building with Visual Studio 2015 or 2017
+
+The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
+
+```
+// Alternatively, can add libraries using linker options.
+#ifdef _WIN32
+#pragma comment ( lib, "Shlwapi.lib" )
+#ifdef _DEBUG
+#pragma comment ( lib, "benchmarkd.lib" )
+#else
+#pragma comment ( lib, "benchmark.lib" )
+#endif
+#endif
+```
+
+Can also use the graphical version of CMake:
+* Open `CMake GUI`.
+* Under `Where to build the binaries`, same path as source plus `build`.
+* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
+* Click `Configure`, `Generate`, `Open Project`.
+* If build fails, try deleting entire directory and starting again, or unticking options to build less.
+
+### Building with Intel 2015 Update 1 or Intel System Studio Update 4
+
+See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
+
+### Building on Solaris
 
 If you're running benchmarks on solaris, you'll want the kstat library linked in
 too (`-lkstat`).
@@ -206,15 +257,19 @@ too (`-lkstat`).
 ## User Guide
 
 ### Command Line
+
 [Output Formats](#output-formats)
 
 [Output Files](#output-files)
 
+[Running Benchmarks](#running-benchmarks)
+
 [Running a Subset of Benchmarks](#running-a-subset-of-benchmarks)
 
 [Result Comparison](#result-comparison)
 
 ### Library
+
 [Runtime and Reporting Considerations](#runtime-and-reporting-considerations)
 
 [Passing Arguments](#passing-arguments)
@@ -249,17 +304,20 @@ too (`-lkstat`).
 
 [Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling)
 
+
 <a name="output-formats" />
 
 ### Output Formats
 
 The library supports multiple output formats. Use the
-`--benchmark_format=<console|json|csv>` flag to set the format type. `console`
-is the default format.
+`--benchmark_format=<console|json|csv>` flag (or set the
+`BENCHMARK_FORMAT=<console|json|csv>` environment variable) to set
+the format type. `console` is the default format.
 
 The Console format is intended to be a human readable format. By default
 the format generates color output. Context is output on stderr and the
 tabular data on stdout. Example tabular output looks like:
+
 ```
 Benchmark                               Time(ns)    CPU(ns) Iterations
 ----------------------------------------------------------------------
@@ -273,6 +331,7 @@ The `context` attribute contains information about the run in general, including
 information about the CPU and the date.
 The `benchmarks` attribute contains a list of every benchmark run. Example json
 output looks like:
+
 ```json
 {
   "context": {
@@ -313,6 +372,7 @@ output looks like:
 
 The CSV format outputs comma-separated values. The `context` is output on stderr
 and the CSV itself on stdout. Example CSV output looks like:
+
 ```
 name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
 "BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942,
@@ -324,16 +384,31 @@ name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
 
 ### Output Files
 
-Write benchmark results to a file with the `--benchmark_out=<filename>` option.
-Specify the output format with `--benchmark_out_format={json|console|csv}`. Note that Specifying
+Write benchmark results to a file with the `--benchmark_out=<filename>` option
+(or set `BENCHMARK_OUT`). Specify the output format with
+`--benchmark_out_format={json|console|csv}` (or set
+`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that specifying
 `--benchmark_out` does not suppress the console output.
 
+<a name="running-benchmarks" />
+
+### Running Benchmarks
+
+Benchmarks are executed by running the produced binaries. Benchmarks binaries,
+by default, accept options that may be specified either through their command
+line interface or by setting environment variables before execution. For every
+`--option_flag=<value>` CLI switch, a corresponding environment variable
+`OPTION_FLAG=<value>` exist and is used as default if set (CLI switches always
+ prevails). A complete list of CLI options is available running benchmarks
+ with the `--help` switch.
+
 <a name="running-a-subset-of-benchmarks" />
 
 ### Running a Subset of Benchmarks
 
-The `--benchmark_filter=<regex>` option can be used to only run the benchmarks
-which match the specified `<regex>`. For example:
+The `--benchmark_filter=<regex>` option (or `BENCHMARK_FILTER=<regex>`
+environment variable) can be used to only run the benchmarks that match
+the specified `<regex>`. For example:
 
 ```bash
 $ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32
@@ -351,7 +426,8 @@ BM_memcpy/32k       1834 ns       1837 ns     357143
 
 ### Result comparison
 
-It is possible to compare the benchmarking results. See [Additional Tooling Documentation](docs/tools.md)
+It is possible to compare the benchmarking results.
+See [Additional Tooling Documentation](docs/tools.md)
 
 <a name="runtime-and-reporting-considerations" />
 
@@ -417,8 +493,26 @@ range multiplier is changed to multiples of two.
 ```c++
 BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10);
 ```
+
 Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ].
 
+The preceding code shows a method of defining a sparse range.  The following
+example shows a method of defining a dense range. It is then used to benchmark
+the performance of `std::vector` initialization for uniformly increasing sizes.
+
+```c++
+static void BM_DenseRange(benchmark::State& state) {
+  for(auto _ : state) {
+    std::vector<int> v(state.range(0), state.range(0));
+    benchmark::DoNotOptimize(v.data());
+    benchmark::ClobberMemory();
+  }
+}
+BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128);
+```
+
+Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ].
+
 You might have a benchmark that depends on two or more inputs. For example, the
 following code defines a family of benchmarks for measuring the speed of set
 insertion.
@@ -454,6 +548,29 @@ pair.
 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
 ```
 
+Some benchmarks may require specific argument values that cannot be expressed
+with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a
+benchmark input for each combination in the product of the supplied vectors.
+
+```c++
+BENCHMARK(BM_SetInsert)
+    ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}})
+// would generate the same benchmark arguments as
+BENCHMARK(BM_SetInsert)
+    ->Args({1<<10, 20})
+    ->Args({3<<10, 20})
+    ->Args({8<<10, 20})
+    ->Args({3<<10, 40})
+    ->Args({8<<10, 40})
+    ->Args({1<<10, 40})
+    ->Args({1<<10, 60})
+    ->Args({3<<10, 60})
+    ->Args({8<<10, 60})
+    ->Args({1<<10, 80})
+    ->Args({3<<10, 80})
+    ->Args({8<<10, 80});
+```
+
 For more complex patterns of inputs, passing a custom function to `Apply` allows
 programmatic specification of an arbitrary set of arguments on which to run the
 benchmark. The following example enumerates a dense range on one parameter,
@@ -486,6 +603,7 @@ void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
 // the specified values to `extra_args`.
 BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
 ```
+
 Note that elements of `...args` may refer to global variables. Users should
 avoid modifying global state inside of a benchmark.
 
@@ -523,7 +641,7 @@ that might be used to customize high-order term calculation.
 
 ```c++
 BENCHMARK(BM_StringCompare)->RangeMultiplier(2)
-    ->Range(1<<10, 1<<18)->Complexity([](int64_t n)->double{return n; });
+    ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; });
 ```
 
 <a name="templated-benchmarks" />
@@ -610,6 +728,7 @@ Also you can create templated fixture by using the following macros:
 * `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)`
 
 For example:
+
 ```c++
 template<typename T>
 class MyFixture : public benchmark::Fixture {};
@@ -659,9 +778,9 @@ the resulting sum is the value which will be shown for the benchmark.
 
 The `Counter` constructor accepts three parameters: the value as a `double`
 ; a bit flag which allows you to show counters as rates, and/or as per-thread
-iteration, and/or as per-thread averages, and/or iteration invariants;
-and a flag specifying the 'unit' - i.e. is 1k a 1000 (default,
-`benchmark::Counter::OneK::kIs1000`), or 1024
+iteration, and/or as per-thread averages, and/or iteration invariants,
+and/or finally inverting the result; and a flag specifying the 'unit' - i.e.
+is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024
 (`benchmark::Counter::OneK::kIs1024`)?
 
 ```c++
@@ -670,8 +789,14 @@ and a flag specifying the 'unit' - i.e. is 1k a 1000 (default,
 
   // Set the counter as a rate. It will be presented divided
   // by the duration of the benchmark.
+  // Meaning: per one second, how many 'foo's are processed?
   state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate);
 
+  // Set the counter as a rate. It will be presented divided
+  // by the duration of the benchmark, and the result inverted.
+  // Meaning: how many seconds it takes to process one 'foo'?
+  state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
+
   // Set the counter as a thread-average quantity. It will
   // be presented divided by the number of threads.
   state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads);
@@ -697,7 +822,7 @@ When you're compiling in C++11 mode or later you can use `insert()` with
 
 #### Counter Reporting
 
-When using the console reporter, by default, user counters are are printed at
+When using the console reporter, by default, user counters are printed at
 the end after the table, the same way as ``bytes_processed`` and
 ``items_processed``. This is best for cases in which there are few counters,
 or where there are only a couple of lines per benchmark. Here's an example of
@@ -758,6 +883,7 @@ BM_CalculatePiRange/256k   2434095 ns    2434186 ns        288 3.1416
 BM_CalculatePiRange/1024k  9721140 ns    9721413 ns         71 3.14159
 BM_CalculatePi/threads:8      2255 ns       9943 ns      70936
 ```
+
 Note above the additional header printed when the benchmark changes from
 ``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does
 not have the same counter set as ``BM_UserCounter``.
@@ -818,7 +944,7 @@ static void MyMain(int size) {
 
 static void BM_OpenMP(benchmark::State& state) {
   for (auto _ : state)
-    MyMain(state.range(0);
+    MyMain(state.range(0));
 }
 
 // Measure the time spent by the main thread, use it to decide for how long to
@@ -849,7 +975,7 @@ BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime();
 Normally, the entire duration of the work loop (`for (auto _ : state) {}`)
 is measured. But sometimes, it is necessary to do some work inside of
 that loop, every iteration, but without counting that time to the benchmark time.
-That is possible, althought it is not recommended, since it has high overhead.
+That is possible, although it is not recommended, since it has high overhead.
 
 ```c++
 static void BM_SetInsert_With_Timer_Control(benchmark::State& state) {
@@ -895,7 +1021,7 @@ static void BM_ManualTiming(benchmark::State& state) {
     auto start = std::chrono::high_resolution_clock::now();
     // Simulate some useful workload with a sleep
     std::this_thread::sleep_for(sleep_duration);
-    auto end   = std::chrono::high_resolution_clock::now();
+    auto end = std::chrono::high_resolution_clock::now();
 
     auto elapsed_seconds =
       std::chrono::duration_cast<std::chrono::duration<double>>(
@@ -1077,7 +1203,9 @@ Users must explicitly exit the loop, otherwise all iterations will be performed.
 Users may explicitly return to exit the benchmark immediately.
 
 The `SkipWithError(...)` function may be used at any point within the benchmark,
-including before and after the benchmark loop.
+including before and after the benchmark loop. Moreover, if `SkipWithError(...)`
+has been used, it is not required to reach the benchmark loop and one may return
+from the benchmark function early.
 
 For example:
 
@@ -1085,24 +1213,32 @@ For example:
 static void BM_test(benchmark::State& state) {
   auto resource = GetResource();
   if (!resource.good()) {
-      state.SkipWithError("Resource is not good!");
-      // KeepRunning() loop will not be entered.
+    state.SkipWithError("Resource is not good!");
+    // KeepRunning() loop will not be entered.
   }
-  for (state.KeepRunning()) {
-      auto data = resource.read_data();
-      if (!resource.good()) {
-        state.SkipWithError("Failed to read data!");
-        break; // Needed to skip the rest of the iteration.
-     }
-     do_stuff(data);
+  while (state.KeepRunning()) {
+    auto data = resource.read_data();
+    if (!resource.good()) {
+      state.SkipWithError("Failed to read data!");
+      break; // Needed to skip the rest of the iteration.
+    }
+    do_stuff(data);
   }
 }
 
 static void BM_test_ranged_fo(benchmark::State & state) {
-  state.SkipWithError("test will not be entered");
+  auto resource = GetResource();
+  if (!resource.good()) {
+    state.SkipWithError("Resource is not good!");
+    return; // Early return is allowed when SkipWithError() has been used.
+  }
   for (auto _ : state) {
-    state.SkipWithError("Failed!");
-    break; // REQUIRED to prevent all further iterations.
+    auto data = resource.read_data();
+    if (!resource.good()) {
+      state.SkipWithError("Failed to read data!");
+      break; // REQUIRED to prevent all further iterations.
+    }
+    do_stuff(data);
   }
 }
 ```
@@ -1167,11 +1303,15 @@ the benchmark loop should be preferred.
 <a name="disabling-cpu-frequency-scaling" />
 
 ### Disabling CPU Frequency Scaling
+
 If you see this error:
+
 ```
 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
 ```
+
 you might want to disable the CPU frequency scaling while running the benchmark:
+
 ```bash
 sudo cpupower frequency-set --governor performance
 ./mybench
diff --git a/src/third_party/benchmark/dist/include/benchmark/benchmark.h b/src/third_party/benchmark/dist/include/benchmark/benchmark.h
index 4f40501596e..b906352849c 100644
--- a/src/third_party/benchmark/dist/include/benchmark/benchmark.h
+++ b/src/third_party/benchmark/dist/include/benchmark/benchmark.h
@@ -176,6 +176,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #include <map>
 #include <set>
 #include <string>
+#include <utility>
 #include <vector>
 
 #if defined(BENCHMARK_HAS_CXX11)
@@ -368,7 +369,10 @@ class Counter {
     // It will be presented divided by the number of iterations.
     kAvgIterations = 1U << 3U,
     // Mark the counter as a iteration-average rate. See above.
-    kAvgIterationsRate = kIsRate | kAvgIterations
+    kAvgIterationsRate = kIsRate | kAvgIterations,
+
+    // In the end, invert the result. This is always done last!
+    kInvert = 1U << 31U
   };
 
   enum OneK {
@@ -538,6 +542,9 @@ class State {
   // responsibility to exit the scope as needed.
   void SkipWithError(const char* msg);
 
+  // Returns true if an error has been reported with 'SkipWithError(...)'.
+  bool error_occurred() const { return error_occurred_; }
+
   // REQUIRES: called exactly once per iteration of the benchmarking loop.
   // Set the manually measured time for this benchmark iteration, which
   // is used instead of automatically measured time if UseManualTime() was
@@ -574,7 +581,7 @@ class State {
   void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
 
   BENCHMARK_ALWAYS_INLINE
-  int64_t complexity_length_n() { return complexity_n_; }
+  int64_t complexity_length_n() const { return complexity_n_; }
 
   // If this routine is called with items > 0, then an items/s
   // label is printed on the benchmark report line for the currently
@@ -821,6 +828,11 @@ class Benchmark {
   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
   Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
 
+  // Run this benchmark once for each combination of values in the (cartesian)
+  // product of the supplied argument lists.
+  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
+  Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
+
   // Equivalent to ArgNames({name})
   Benchmark* ArgName(const std::string& name);
 
@@ -1291,10 +1303,16 @@ struct CPUInfo {
     int num_sharing;
   };
 
+  enum Scaling {
+    UNKNOWN,
+    ENABLED,
+    DISABLED
+  };
+
   int num_cpus;
   double cycles_per_second;
   std::vector<CacheInfo> caches;
-  bool scaling_enabled;
+  Scaling scaling;
   std::vector<double> load_avg;
 
   static const CPUInfo& Get();
diff --git a/src/third_party/benchmark/dist/src/benchmark.cc b/src/third_party/benchmark/dist/src/benchmark.cc
index 29bfa3512f9..1c049f28844 100644
--- a/src/third_party/benchmark/dist/src/benchmark.cc
+++ b/src/third_party/benchmark/dist/src/benchmark.cc
@@ -51,66 +51,60 @@
 #include "thread_manager.h"
 #include "thread_timer.h"
 
-DEFINE_bool(benchmark_list_tests, false,
-            "Print a list of benchmarks. This option overrides all other "
-            "options.");
-
-DEFINE_string(benchmark_filter, ".",
-              "A regular expression that specifies the set of benchmarks "
-              "to execute.  If this flag is empty, or if this flag is the "
-              "string \"all\", all benchmarks linked into the binary are "
-              "run.");
-
-DEFINE_double(benchmark_min_time, 0.5,
-              "Minimum number of seconds we should run benchmark before "
-              "results are considered significant.  For cpu-time based "
-              "tests, this is the lower bound on the total cpu time "
-              "used by all threads that make up the test.  For real-time "
-              "based tests, this is the lower bound on the elapsed time "
-              "of the benchmark execution, regardless of number of "
-              "threads.");
-
-DEFINE_int32(benchmark_repetitions, 1,
-             "The number of runs of each benchmark. If greater than 1, the "
-             "mean and standard deviation of the runs will be reported.");
-
-DEFINE_bool(
-    benchmark_report_aggregates_only, false,
-    "Report the result of each benchmark repetitions. When 'true' is specified "
-    "only the mean, standard deviation, and other statistics are reported for "
-    "repeated benchmarks. Affects all reporters.");
-
-DEFINE_bool(
-    benchmark_display_aggregates_only, false,
-    "Display the result of each benchmark repetitions. When 'true' is "
-    "specified only the mean, standard deviation, and other statistics are "
-    "displayed for repeated benchmarks. Unlike "
-    "benchmark_report_aggregates_only, only affects the display reporter, but "
-    "*NOT* file reporter, which will still contain all the output.");
-
-DEFINE_string(benchmark_format, "console",
-              "The format to use for console output. Valid values are "
-              "'console', 'json', or 'csv'.");
-
-DEFINE_string(benchmark_out_format, "json",
-              "The format to use for file output. Valid values are "
-              "'console', 'json', or 'csv'.");
-
-DEFINE_string(benchmark_out, "", "The file to write additional output to");
-
-DEFINE_string(benchmark_color, "auto",
-              "Whether to use colors in the output.  Valid values: "
-              "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use "
-              "colors if the output is being sent to a terminal and the TERM "
-              "environment variable is set to a terminal type that supports "
-              "colors.");
-
-DEFINE_bool(benchmark_counters_tabular, false,
-            "Whether to use tabular format when printing user counters to "
-            "the console.  Valid values: 'true'/'yes'/1, 'false'/'no'/0."
-            "Defaults to false.");
-
-DEFINE_int32(v, 0, "The level of verbose logging to output");
+// Print a list of benchmarks. This option overrides all other options.
+DEFINE_bool(benchmark_list_tests, false);
+
+// A regular expression that specifies the set of benchmarks to execute.  If
+// this flag is empty, or if this flag is the string \"all\", all benchmarks
+// linked into the binary are run.
+DEFINE_string(benchmark_filter, ".");
+
+// Minimum number of seconds we should run benchmark before results are
+// considered significant.  For cpu-time based tests, this is the lower bound
+// on the total cpu time used by all threads that make up the test.  For
+// real-time based tests, this is the lower bound on the elapsed time of the
+// benchmark execution, regardless of number of threads.
+DEFINE_double(benchmark_min_time, 0.5);
+
+// The number of runs of each benchmark. If greater than 1, the mean and
+// standard deviation of the runs will be reported.
+DEFINE_int32(benchmark_repetitions, 1);
+
+// Report the result of each benchmark repetitions. When 'true' is specified
+// only the mean, standard deviation, and other statistics are reported for
+// repeated benchmarks. Affects all reporters.
+DEFINE_bool(benchmark_report_aggregates_only, false);
+
+// Display the result of each benchmark repetitions. When 'true' is specified
+// only the mean, standard deviation, and other statistics are displayed for
+// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
+// the display reporter, but  *NOT* file reporter, which will still contain
+// all the output.
+DEFINE_bool(benchmark_display_aggregates_only, false);
+
+// The format to use for console output.
+// Valid values are 'console', 'json', or 'csv'.
+DEFINE_string(benchmark_format, "console");
+
+// The format to use for file output.
+// Valid values are 'console', 'json', or 'csv'.
+DEFINE_string(benchmark_out_format, "json");
+
+// The file to write additional output to.
+DEFINE_string(benchmark_out, "");
+
+// Whether to use colors in the output.  Valid values:
+// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
+// the output is being sent to a terminal and the TERM environment variable is
+// set to a terminal type that supports colors.
+DEFINE_string(benchmark_color, "auto");
+
+// Whether to use tabular format when printing user counters to the console.
+// Valid values: 'true'/'yes'/1, 'false'/'no'/0.  Defaults to false.
+DEFINE_bool(benchmark_counters_tabular, false);
+
+// The level of verbose logging to output
+DEFINE_int32(v, 0);
 
 namespace benchmark {
 
@@ -148,7 +142,7 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
   // which must be suppressed.
 #if defined(__INTEL_COMPILER)
 #pragma warning push
-#pragma warning(disable:1875)
+#pragma warning(disable : 1875)
 #elif defined(__GNUC__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Winvalid-offsetof"
@@ -289,6 +283,13 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
   flushStreams(file_reporter);
 }
 
+// Disable deprecated warnings temporarily because we need to reference
+// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
 std::unique_ptr<BenchmarkReporter> CreateReporter(
     std::string const& name, ConsoleReporter::OutputOptions output_opts) {
   typedef std::unique_ptr<BenchmarkReporter> PtrType;
@@ -304,6 +305,10 @@ std::unique_ptr<BenchmarkReporter> CreateReporter(
   }
 }
 
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
 }  // end namespace
 
 bool IsZero(double n) {
@@ -312,7 +317,7 @@ bool IsZero(double n) {
 
 ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
   int output_opts = ConsoleReporter::OO_Defaults;
-  auto is_benchmark_color = [force_no_color] () -> bool {
+  auto is_benchmark_color = [force_no_color]() -> bool {
     if (force_no_color) {
       return false;
     }
@@ -430,7 +435,7 @@ void ParseCommandLineFlags(int* argc, char** argv) {
   using namespace benchmark;
   BenchmarkReporter::Context::executable_name =
       (argc && *argc > 0) ? argv[0] : "unknown";
-  for (int i = 1; i < *argc; ++i) {
+  for (int i = 1; argc && i < *argc; ++i) {
     if (ParseBoolFlag(argv[i], "benchmark_list_tests",
                       &FLAGS_benchmark_list_tests) ||
         ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
diff --git a/src/third_party/benchmark/dist/src/benchmark_register.cc b/src/third_party/benchmark/dist/src/benchmark_register.cc
index 6696c382b80..65d9944f4f9 100644
--- a/src/third_party/benchmark/dist/src/benchmark_register.cc
+++ b/src/third_party/benchmark/dist/src/benchmark_register.cc
@@ -31,10 +31,13 @@
 #include <fstream>
 #include <iostream>
 #include <memory>
+#include <numeric>
 #include <sstream>
 #include <thread>
 
+#ifndef __STDC_FORMAT_MACROS
 #define __STDC_FORMAT_MACROS
+#endif
 #include <inttypes.h>
 
 #include "benchmark/benchmark.h"
@@ -301,33 +304,41 @@ Benchmark* Benchmark::Ranges(
     const std::vector<std::pair<int64_t, int64_t>>& ranges) {
   CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
   std::vector<std::vector<int64_t>> arglists(ranges.size());
-  std::size_t total = 1;
   for (std::size_t i = 0; i < ranges.size(); i++) {
     AddRange(&arglists[i], ranges[i].first, ranges[i].second,
              range_multiplier_);
-    total *= arglists[i].size();
   }
 
-  std::vector<std::size_t> ctr(arglists.size(), 0);
-
-  for (std::size_t i = 0; i < total; i++) {
-    std::vector<int64_t> tmp;
-    tmp.reserve(arglists.size());
+  ArgsProduct(arglists);
 
-    for (std::size_t j = 0; j < arglists.size(); j++) {
-      tmp.push_back(arglists[j].at(ctr[j]));
-    }
+  return this;
+}
 
-    args_.push_back(std::move(tmp));
+Benchmark* Benchmark::ArgsProduct(
+    const std::vector<std::vector<int64_t>>& arglists) {
+  CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
 
-    for (std::size_t j = 0; j < arglists.size(); j++) {
-      if (ctr[j] + 1 < arglists[j].size()) {
-        ++ctr[j];
-        break;
-      }
-      ctr[j] = 0;
+  std::vector<std::size_t> indices(arglists.size());
+  const std::size_t total = std::accumulate(
+      std::begin(arglists), std::end(arglists), std::size_t{1},
+      [](const std::size_t res, const std::vector<int64_t>& arglist) {
+        return res * arglist.size();
+      });
+  std::vector<int64_t> args;
+  args.reserve(arglists.size());
+  for (std::size_t i = 0; i < total; i++) {
+    for (std::size_t arg = 0; arg < arglists.size(); arg++) {
+      args.push_back(arglists[arg][indices[arg]]);
     }
+    args_.push_back(args);
+    args.clear();
+
+    std::size_t arg = 0;
+    do {
+      indices[arg] = (indices[arg] + 1) % arglists[arg].size();
+    } while (indices[arg++] == 0 && arg < arglists.size());
   }
+
   return this;
 }
 
diff --git a/src/third_party/benchmark/dist/src/benchmark_runner.cc b/src/third_party/benchmark/dist/src/benchmark_runner.cc
index 0bae6a545ef..7bc6b6329ef 100644
--- a/src/third_party/benchmark/dist/src/benchmark_runner.cc
+++ b/src/third_party/benchmark/dist/src/benchmark_runner.cc
@@ -117,7 +117,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
           ? internal::ThreadTimer::CreateProcessCpuTime()
           : internal::ThreadTimer::Create());
   State st = b->Run(iters, thread_id, &timer, manager);
-  CHECK(st.iterations() >= st.max_iterations)
+  CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
       << "Benchmark returned before State::KeepRunning() returned false!";
   {
     MutexLock l(manager->GetBenchmarkMutex());
@@ -263,8 +263,9 @@ class BenchmarkRunner {
     if (multiplier <= 1.0) multiplier = 2.0;
 
     // So what seems to be the sufficiently-large iteration count? Round up.
-    const IterationCount max_next_iters =
-        0.5 + std::max(multiplier * i.iters, i.iters + 1.0);
+    const IterationCount max_next_iters = static_cast<IterationCount>(
+        std::lround(std::max(multiplier * static_cast<double>(i.iters),
+                             static_cast<double>(i.iters) + 1.0)));
     // But we do have *some* sanity limits though..
     const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
 
diff --git a/src/third_party/benchmark/dist/src/commandlineflags.cc b/src/third_party/benchmark/dist/src/commandlineflags.cc
index 6bd65c5ae70..0648fe3a06e 100644
--- a/src/third_party/benchmark/dist/src/commandlineflags.cc
+++ b/src/third_party/benchmark/dist/src/commandlineflags.cc
@@ -14,6 +14,7 @@
 
 #include "commandlineflags.h"
 
+#include <algorithm>
 #include <cctype>
 #include <cstdlib>
 #include <cstring>
@@ -87,49 +88,45 @@ static std::string FlagToEnvVar(const char* flag) {
   for (size_t i = 0; i != flag_str.length(); ++i)
     env_var += static_cast<char>(::toupper(flag_str.c_str()[i]));
 
-  return "BENCHMARK_" + env_var;
+  return env_var;
 }
 
 }  // namespace
 
-// Reads and returns the Boolean environment variable corresponding to
-// the given flag; if it's not set, returns default_value.
-//
-// The value is considered true iff it's not "0".
-bool BoolFromEnv(const char* flag, bool default_value) {
+bool BoolFromEnv(const char* flag, bool default_val) {
   const std::string env_var = FlagToEnvVar(flag);
-  const char* const string_value = getenv(env_var.c_str());
-  return string_value == nullptr ? default_value
-                                 : strcmp(string_value, "0") != 0;
+  const char* const value_str = getenv(env_var.c_str());
+  return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
 }
 
-// Reads and returns a 32-bit integer stored in the environment
-// variable corresponding to the given flag; if it isn't set or
-// doesn't represent a valid 32-bit integer, returns default_value.
-int32_t Int32FromEnv(const char* flag, int32_t default_value) {
+int32_t Int32FromEnv(const char* flag, int32_t default_val) {
   const std::string env_var = FlagToEnvVar(flag);
-  const char* const string_value = getenv(env_var.c_str());
-  if (string_value == nullptr) {
-    // The environment variable is not set.
-    return default_value;
+  const char* const value_str = getenv(env_var.c_str());
+  int32_t value = default_val;
+  if (value_str == nullptr ||
+      !ParseInt32(std::string("Environment variable ") + env_var, value_str,
+                  &value)) {
+    return default_val;
   }
+  return value;
+}
 
-  int32_t result = default_value;
-  if (!ParseInt32(std::string("Environment variable ") + env_var, string_value,
-                  &result)) {
-    std::cout << "The default value " << default_value << " is used.\n";
-    return default_value;
+double DoubleFromEnv(const char* flag, double default_val) {
+  const std::string env_var = FlagToEnvVar(flag);
+  const char* const value_str = getenv(env_var.c_str());
+  double value = default_val;
+  if (value_str == nullptr ||
+      !ParseDouble(std::string("Environment variable ") + env_var, value_str,
+                   &value)) {
+    return default_val;
   }
-
-  return result;
+  return value;
 }
 
-// Reads and returns the string environment variable corresponding to
-// the given flag; if it's not set, returns default_value.
-const char* StringFromEnv(const char* flag, const char* default_value) {
+const char* StringFromEnv(const char* flag, const char* default_val) {
   const std::string env_var = FlagToEnvVar(flag);
   const char* const value = getenv(env_var.c_str());
-  return value == nullptr ? default_value : value;
+  return value == nullptr ? default_val : value;
 }
 
 // Parses a string as a command line flag.  The string should have
@@ -214,9 +211,18 @@ bool IsFlag(const char* str, const char* flag) {
 }
 
 bool IsTruthyFlagValue(const std::string& value) {
-  if (value.empty()) return true;
-  char ch = value[0];
-  return isalnum(ch) &&
-         !(ch == '0' || ch == 'f' || ch == 'F' || ch == 'n' || ch == 'N');
+  if (value.size() == 1) {
+    char v = value[0];
+    return isalnum(v) &&
+           !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
+  } else if (!value.empty()) {
+    std::string value_lower(value);
+    std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
+                   [](char c) { return static_cast<char>(::tolower(c)); });
+    return !(value_lower == "false" || value_lower == "no" ||
+             value_lower == "off");
+  } else
+    return true;
 }
+
 }  // end namespace benchmark
diff --git a/src/third_party/benchmark/dist/src/commandlineflags.h b/src/third_party/benchmark/dist/src/commandlineflags.h
index 5eaea82a59b..3a1f6a8dbc9 100644
--- a/src/third_party/benchmark/dist/src/commandlineflags.h
+++ b/src/third_party/benchmark/dist/src/commandlineflags.h
@@ -10,23 +10,51 @@
 // Macros for declaring flags.
 #define DECLARE_bool(name) extern bool FLAG(name)
 #define DECLARE_int32(name) extern int32_t FLAG(name)
-#define DECLARE_int64(name) extern int64_t FLAG(name)
 #define DECLARE_double(name) extern double FLAG(name)
 #define DECLARE_string(name) extern std::string FLAG(name)
 
 // Macros for defining flags.
-#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
-#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
-#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
-#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
-#define DEFINE_string(name, default_val, doc) \
-  std::string FLAG(name) = (default_val)
+#define DEFINE_bool(name, default_val)            \
+  bool FLAG(name) =                               \
+    benchmark::BoolFromEnv(#name, default_val)
+#define DEFINE_int32(name, default_val)           \
+  int32_t FLAG(name) =                            \
+    benchmark::Int32FromEnv(#name, default_val)
+#define DEFINE_double(name, default_val)          \
+  double FLAG(name) =                             \
+    benchmark::DoubleFromEnv(#name, default_val)
+#define DEFINE_string(name, default_val)          \
+  std::string FLAG(name) =                        \
+    benchmark::StringFromEnv(#name, default_val)
 
 namespace benchmark {
-// Parses a bool/Int32/string from the environment variable
-// corresponding to the given Google Test flag.
+
+// Parses a bool from the environment variable
+// corresponding to the given flag.
+//
+// If the variable exists, returns IsTruthyFlagValue() value;  if not,
+// returns the given default value.
 bool BoolFromEnv(const char* flag, bool default_val);
+
+// Parses an Int32 from the environment variable
+// corresponding to the given flag.
+//
+// If the variable exists, returns ParseInt32() value;  if not, returns
+// the given default value.
 int32_t Int32FromEnv(const char* flag, int32_t default_val);
+
+// Parses an Double from the environment variable
+// corresponding to the given flag.
+//
+// If the variable exists, returns ParseDouble();  if not, returns
+// the given default value.
+double DoubleFromEnv(const char* flag, double default_val);
+
+// Parses a string from the environment variable
+// corresponding to the given flag.
+//
+// If variable exists, returns its value;  if not, returns
+// the given default value.
 const char* StringFromEnv(const char* flag, const char* default_val);
 
 // Parses a string for a bool flag, in the form of either
@@ -65,9 +93,11 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value);
 bool IsFlag(const char* str, const char* flag);
 
 // Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
-// some non-alphanumeric character. As a special case, also returns true if
-// value is the empty string.
+// some non-alphanumeric character. Also returns false if the value matches
+// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
+// returns true if value is the empty string.
 bool IsTruthyFlagValue(const std::string& value);
+
 }  // end namespace benchmark
 
 #endif  // BENCHMARK_COMMANDLINEFLAGS_H_
diff --git a/src/third_party/benchmark/dist/src/console_reporter.cc b/src/third_party/benchmark/dist/src/console_reporter.cc
index cc8ae276f6b..6fd764525e8 100644
--- a/src/third_party/benchmark/dist/src/console_reporter.cc
+++ b/src/third_party/benchmark/dist/src/console_reporter.cc
@@ -12,21 +12,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "benchmark/benchmark.h"
-#include "complexity.h"
-#include "counter.h"
-
 #include <algorithm>
 #include <cstdint>
 #include <cstdio>
+#include <cstring>
 #include <iostream>
 #include <string>
 #include <tuple>
 #include <vector>
 
+#include "benchmark/benchmark.h"
 #include "check.h"
 #include "colorprint.h"
 #include "commandlineflags.h"
+#include "complexity.h"
+#include "counter.h"
 #include "internal_macros.h"
 #include "string_util.h"
 #include "timers.h"
@@ -156,16 +156,14 @@ void ConsoleReporter::PrintRunData(const Run& result) {
     const std::size_t cNameLen = std::max(std::string::size_type(10),
                                           c.first.length());
     auto const& s = HumanReadableNumber(c.second.value, c.second.oneK);
+    const char* unit = "";
+    if (c.second.flags & Counter::kIsRate)
+      unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
     if (output_options_ & OO_Tabular) {
-      if (c.second.flags & Counter::kIsRate) {
-        printer(Out, COLOR_DEFAULT, " %*s/s", cNameLen - 2, s.c_str());
-      } else {
-        printer(Out, COLOR_DEFAULT, " %*s", cNameLen, s.c_str());
-      }
-    } else {
-      const char* unit = (c.second.flags & Counter::kIsRate) ? "/s" : "";
-      printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(),
+      printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
               unit);
+    } else {
+      printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit);
     }
   }
 
diff --git a/src/third_party/benchmark/dist/src/counter.cc b/src/third_party/benchmark/dist/src/counter.cc
index c248ea110bc..cf5b78ee3ac 100644
--- a/src/third_party/benchmark/dist/src/counter.cc
+++ b/src/third_party/benchmark/dist/src/counter.cc
@@ -32,6 +32,10 @@ double Finish(Counter const& c, IterationCount iterations, double cpu_time,
   if (c.flags & Counter::kAvgIterations) {
     v /= iterations;
   }
+
+  if (c.flags & Counter::kInvert) {  // Invert is *always* last.
+    v = 1.0 / v;
+  }
   return v;
 }
 
diff --git a/src/third_party/benchmark/dist/src/counter.h b/src/third_party/benchmark/dist/src/counter.h
index 1ad46d4940e..1f5a58e31f0 100644
--- a/src/third_party/benchmark/dist/src/counter.h
+++ b/src/third_party/benchmark/dist/src/counter.h
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#ifndef BENCHMARK_COUNTER_H_
+#define BENCHMARK_COUNTER_H_
+
 #include "benchmark/benchmark.h"
 
 namespace benchmark {
@@ -25,3 +28,5 @@ bool SameNames(UserCounters const& l, UserCounters const& r);
 }  // end namespace internal
 
 }  // end namespace benchmark
+
+#endif  // BENCHMARK_COUNTER_H_
diff --git a/src/third_party/benchmark/dist/src/cycleclock.h b/src/third_party/benchmark/dist/src/cycleclock.h
index f5e37b011b9..179c67cd614 100644
--- a/src/third_party/benchmark/dist/src/cycleclock.h
+++ b/src/third_party/benchmark/dist/src/cycleclock.h
@@ -84,13 +84,21 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   return (high << 32) | low;
 #elif defined(__powerpc__) || defined(__ppc__)
   // This returns a time-base, which is not always precisely a cycle-count.
-  int64_t tbl, tbu0, tbu1;
-  asm("mftbu %0" : "=r"(tbu0));
-  asm("mftb  %0" : "=r"(tbl));
-  asm("mftbu %0" : "=r"(tbu1));
-  tbl &= -static_cast<int64_t>(tbu0 == tbu1);
-  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is garbage)
-  return (tbu1 << 32) | tbl;
+#if defined(__powerpc64__) || defined(__ppc64__)
+  int64_t tb;
+  asm volatile("mfspr %0, 268" : "=r"(tb));
+  return tb;
+#else
+  uint32_t tbl, tbu0, tbu1;
+  asm volatile(
+      "mftbu %0\n"
+      "mftbl %1\n"
+      "mftbu %2"
+      : "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
+  tbl &= -static_cast<int32_t>(tbu0 == tbu1);
+  // high 32 bits in tbu1; low 32 bits in tbl  (tbu0 is no longer needed)
+  return (static_cast<uint64_t>(tbu1) << 32) | tbl;
+#endif
 #elif defined(__sparc__)
   int64_t tick;
   asm(".byte 0x83, 0x41, 0x00, 0x00");
@@ -164,6 +172,27 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   uint64_t tsc;
   asm("stck %0" : "=Q"(tsc) : : "cc");
   return tsc;
+#elif defined(__riscv) // RISC-V
+  // Use RDCYCLE (and RDCYCLEH on riscv32)
+#if __riscv_xlen == 32
+  uint32_t cycles_lo, cycles_hi0, cycles_hi1;
+  // This asm also includes the PowerPC overflow handling strategy, as above.
+  // Implemented in assembly because Clang insisted on branching.
+  asm volatile(
+      "rdcycleh %0\n"
+      "rdcycle %1\n"
+      "rdcycleh %2\n"
+      "sub %0, %0, %2\n"
+      "seqz %0, %0\n"
+      "sub %0, zero, %0\n"
+      "and %1, %1, %0\n"
+      : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
+  return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
+#else
+  uint64_t cycles;
+  asm volatile("rdcycle %0" : "=r"(cycles));
+  return cycles;
+#endif
 #else
 // The soft failover to a generic implementation is automatic only for ARM.
 // For other platforms the developer is expected to make an attempt to create
diff --git a/src/third_party/benchmark/dist/src/json_reporter.cc b/src/third_party/benchmark/dist/src/json_reporter.cc
index 0495d96688c..54f3ecfdbc4 100644
--- a/src/third_party/benchmark/dist/src/json_reporter.cc
+++ b/src/third_party/benchmark/dist/src/json_reporter.cc
@@ -92,7 +92,7 @@ std::string FormatKV(std::string const& key, double value) {
   return ss.str();
 }
 
-int64_t RoundDouble(double v) { return static_cast<int64_t>(v + 0.5); }
+int64_t RoundDouble(double v) { return std::lround(v); }
 
 }  // end namespace
 
@@ -134,8 +134,10 @@ bool JSONReporter::ReportContext(const Context& context) {
       << FormatKV("mhz_per_cpu",
                   RoundDouble(info.cycles_per_second / 1000000.0))
       << ",\n";
-  out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled)
-      << ",\n";
+  if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
+    out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
+        << ",\n";
+  }
 
   out << indent << "\"caches\": [\n";
   indent = std::string(6, ' ');
@@ -147,7 +149,7 @@ bool JSONReporter::ReportContext(const Context& context) {
     out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
         << ",\n";
     out << cache_indent
-        << FormatKV("size", static_cast<int64_t>(CI.size) * 1000u) << ",\n";
+        << FormatKV("size", static_cast<int64_t>(CI.size)) << ",\n";
     out << cache_indent
         << FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
         << "\n";
diff --git a/src/third_party/benchmark/dist/src/mutex.h b/src/third_party/benchmark/dist/src/mutex.h
index 5f461d05a0c..3fac79aea41 100644
--- a/src/third_party/benchmark/dist/src/mutex.h
+++ b/src/third_party/benchmark/dist/src/mutex.h
@@ -71,7 +71,7 @@ typedef std::condition_variable Condition;
 // NOTE: Wrappers for std::mutex and std::unique_lock are provided so that
 // we can annotate them with thread safety attributes and use the
 // -Wthread-safety warning with clang. The standard library types cannot be
-// used directly because they do not provided the required annotations.
+// used directly because they do not provide the required annotations.
 class CAPABILITY("mutex") Mutex {
  public:
   Mutex() {}
diff --git a/src/third_party/benchmark/dist/src/reporter.cc b/src/third_party/benchmark/dist/src/reporter.cc
index 4d3e477d44a..337575a1187 100644
--- a/src/third_party/benchmark/dist/src/reporter.cc
+++ b/src/third_party/benchmark/dist/src/reporter.cc
@@ -49,7 +49,7 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
     Out << "CPU Caches:\n";
     for (auto &CInfo : info.caches) {
       Out << "  L" << CInfo.level << " " << CInfo.type << " "
-          << (CInfo.size / 1000) << "K";
+          << (CInfo.size / 1024) << " KiB";
       if (CInfo.num_sharing != 0)
         Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
       Out << "\n";
@@ -64,7 +64,7 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
     Out << "\n";
   }
 
-  if (info.scaling_enabled) {
+  if (CPUInfo::Scaling::ENABLED == info.scaling) {
     Out << "***WARNING*** CPU scaling is enabled, the benchmark "
            "real time measurements may be noisy and will incur extra "
            "overhead.\n";
diff --git a/src/third_party/benchmark/dist/src/string_util.cc b/src/third_party/benchmark/dist/src/string_util.cc
index 39b01a1719a..ac60b5588f0 100644
--- a/src/third_party/benchmark/dist/src/string_util.cc
+++ b/src/third_party/benchmark/dist/src/string_util.cc
@@ -1,6 +1,9 @@
 #include "string_util.h"
 
 #include <array>
+#ifdef BENCHMARK_STL_ANDROID_GNUSTL
+#include <cerrno>
+#endif
 #include <cmath>
 #include <cstdarg>
 #include <cstdio>
diff --git a/src/third_party/benchmark/dist/src/sysinfo.cc b/src/third_party/benchmark/dist/src/sysinfo.cc
index 28126470bad..8bab9320f1d 100644
--- a/src/third_party/benchmark/dist/src/sysinfo.cc
+++ b/src/third_party/benchmark/dist/src/sysinfo.cc
@@ -57,6 +57,7 @@
 #include <memory>
 #include <sstream>
 #include <locale>
+#include <utility>
 
 #include "check.h"
 #include "cycleclock.h"
@@ -209,11 +210,11 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) {
   return f.good();
 }
 
-bool CpuScalingEnabled(int num_cpus) {
+CPUInfo::Scaling CpuScaling(int num_cpus) {
   // We don't have a valid CPU count, so don't even bother.
-  if (num_cpus <= 0) return false;
+  if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN;
 #ifdef BENCHMARK_OS_QNX
-  return false;
+  return CPUInfo::Scaling::UNKNOWN;
 #endif
 #ifndef BENCHMARK_OS_WINDOWS
   // On Linux, the CPUfreq subsystem exposes CPU information as files on the
@@ -223,10 +224,11 @@ bool CpuScalingEnabled(int num_cpus) {
   for (int cpu = 0; cpu < num_cpus; ++cpu) {
     std::string governor_file =
         StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
-    if (ReadFromFile(governor_file, &res) && res != "performance") return true;
+    if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED;
   }
+  return CPUInfo::Scaling::DISABLED;
 #endif
-  return false;
+  return CPUInfo::Scaling::UNKNOWN;
 }
 
 int CountSetBitsInCPUMap(std::string Val) {
@@ -270,7 +272,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
       else if (f && suffix != "K")
         PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
       else if (suffix == "K")
-        info.size *= 1000;
+        info.size *= 1024;
     }
     if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
       PrintErrorAndDie("Failed to read from file ", FPath, "type");
@@ -382,9 +384,11 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesQNX() {
       case CACHE_FLAG_UNIFIED :
         info.type = "Unified";
         info.level = 2;
+        break;
       case CACHE_FLAG_SHARED :
         info.type = "Shared";
         info.level = 3;
+        break;
       default :
         continue;
         break;
@@ -429,11 +433,20 @@ std::string GetSystemName() {
 #endif
   return str;
 #else // defined(BENCHMARK_OS_WINDOWS)
+#ifndef HOST_NAME_MAX
 #ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined
 #define HOST_NAME_MAX 64
+#elif defined(BENCHMARK_OS_NACL)
+#define HOST_NAME_MAX 64
 #elif defined(BENCHMARK_OS_QNX)
 #define HOST_NAME_MAX 154
+#elif defined(BENCHMARK_OS_RTEMS)
+#define HOST_NAME_MAX 256
+#else
+#warning "HOST_NAME_MAX not defined. using 64"
+#define HOST_NAME_MAX 64
 #endif
+#endif // def HOST_NAME_MAX
   char hostname[HOST_NAME_MAX];
   int retVal = gethostname(hostname, HOST_NAME_MAX);
   if (retVal != 0) return std::string("");
@@ -686,7 +699,7 @@ CPUInfo::CPUInfo()
     : num_cpus(GetNumCPUs()),
       cycles_per_second(GetCPUCyclesPerSecond()),
       caches(GetCacheSizes()),
-      scaling_enabled(CpuScalingEnabled(num_cpus)),
+      scaling(CpuScaling(num_cpus)),
       load_avg(GetLoadAvg()) {}
 
 
diff --git a/src/third_party/benchmark/dist/src/thread_manager.h b/src/third_party/benchmark/dist/src/thread_manager.h
index 1720281f0a1..28e2dd53aff 100644
--- a/src/third_party/benchmark/dist/src/thread_manager.h
+++ b/src/third_party/benchmark/dist/src/thread_manager.h
@@ -11,7 +11,7 @@ namespace internal {
 
 class ThreadManager {
  public:
-  ThreadManager(int num_threads)
+  explicit ThreadManager(int num_threads)
       : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
 
   Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
diff --git a/src/third_party/benchmark/dist/src/thread_timer.h b/src/third_party/benchmark/dist/src/thread_timer.h
index fbd298d3bd4..1703ca0d6f8 100644
--- a/src/third_party/benchmark/dist/src/thread_timer.h
+++ b/src/third_party/benchmark/dist/src/thread_timer.h
@@ -43,19 +43,19 @@ class ThreadTimer {
   bool running() const { return running_; }
 
   // REQUIRES: timer is not running
-  double real_time_used() {
+  double real_time_used() const {
     CHECK(!running_);
     return real_time_used_;
   }
 
   // REQUIRES: timer is not running
-  double cpu_time_used() {
+  double cpu_time_used() const {
     CHECK(!running_);
     return cpu_time_used_;
   }
 
   // REQUIRES: timer is not running
-  double manual_time_used() {
+  double manual_time_used() const {
     CHECK(!running_);
     return manual_time_used_;
   }
diff --git a/src/third_party/benchmark/dist/src/timers.cc b/src/third_party/benchmark/dist/src/timers.cc
index 7613ff92c6e..4f76eddc1d3 100644
--- a/src/third_party/benchmark/dist/src/timers.cc
+++ b/src/third_party/benchmark/dist/src/timers.cc
@@ -178,40 +178,67 @@ double ThreadCPUUsage() {
 #endif
 }
 
-namespace {
-
-std::string DateTimeString(bool local) {
+std::string LocalDateTimeString() {
+  // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM.
   typedef std::chrono::system_clock Clock;
   std::time_t now = Clock::to_time_t(Clock::now());
-  const std::size_t kStorageSize = 128;
-  char storage[kStorageSize];
-  std::size_t written;
+  const std::size_t kTzOffsetLen = 6;
+  const std::size_t kTimestampLen = 19;
+
+  std::size_t tz_len;
+  std::size_t timestamp_len;
+  long int offset_minutes;
+  char tz_offset_sign = '+';
+  // Long enough buffers to avoid format-overflow warnings
+  char tz_offset[128];
+  char storage[128];
 
-  if (local) {
 #if defined(BENCHMARK_OS_WINDOWS)
-    written =
-        std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now));
+  std::tm *timeinfo_p = ::localtime(&now);
 #else
-    std::tm timeinfo;
-    ::localtime_r(&now, &timeinfo);
-    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
+  std::tm timeinfo;
+  std::tm *timeinfo_p = &timeinfo;
+  ::localtime_r(&now, &timeinfo);
 #endif
+
+  tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p);
+
+  if (tz_len < kTzOffsetLen && tz_len > 1) {
+    // Timezone offset was written. strftime writes offset as +HHMM or -HHMM,
+    // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse
+    // the offset as an integer, then reprint it to a string.
+
+    offset_minutes = ::strtol(tz_offset, NULL, 10);
+    if (offset_minutes < 0) {
+      offset_minutes *= -1;
+      tz_offset_sign = '-';
+    }
+
+    tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li",
+        tz_offset_sign, offset_minutes / 100, offset_minutes % 100);
+    CHECK(tz_len == kTzOffsetLen);
+    ((void)tz_len); // Prevent unused variable warning in optimized build.
   } else {
+    // Unknown offset. RFC3339 specifies that unknown local offsets should be
+    // written as UTC time with -00:00 timezone.
 #if defined(BENCHMARK_OS_WINDOWS)
-    written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now));
+    // Potential race condition if another thread calls localtime or gmtime.
+    timeinfo_p = ::gmtime(&now);
 #else
-    std::tm timeinfo;
     ::gmtime_r(&now, &timeinfo);
-    written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo);
 #endif
+
+    strncpy(tz_offset, "-00:00", kTzOffsetLen + 1);
   }
-  CHECK(written < kStorageSize);
-  ((void)written);  // prevent unused variable in optimized mode.
-  return std::string(storage);
-}
 
-}  // end namespace
+  timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S",
+      timeinfo_p);
+  CHECK(timestamp_len == kTimestampLen);
+  // Prevent unused variable warning in optimized build.
+  ((void)kTimestampLen);
 
-std::string LocalDateTimeString() { return DateTimeString(true); }
+  std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1);
+  return std::string(storage);
+}
 
 }  // end namespace benchmark
diff --git a/src/third_party/benchmark/scripts/import.sh b/src/third_party/benchmark/scripts/import.sh
index 627c496cdbf..84957135857 100755
--- a/src/third_party/benchmark/scripts/import.sh
+++ b/src/third_party/benchmark/scripts/import.sh
@@ -7,7 +7,7 @@ IFS=$'\n\t'
 
 NAME="benchmark"
 
-LIB_GIT_REV="mongo/v1.5.0"
+LIB_GIT_REV="mongo/v1.5.2"
 
 LIB_GIT_URL="https://github.com/mongodb-forks/benchmark.git"
 LIB_GIT_DIR=$(mktemp -d /tmp/import-benchmark.XXXXXX)