diff options
23 files changed, 559 insertions, 258 deletions
diff --git a/README.third_party.md b/README.third_party.md index 86b004aa736..d731f74843e 100644 --- a/README.third_party.md +++ b/README.third_party.md @@ -24,7 +24,7 @@ a notice will be included in | [abseil-cpp] | Apache-2.0 | | 070f6e47b3 | | ✗ | | Aladdin MD5 | Zlib | | Unknown | ✗ | ✗ | | [ASIO] | BSL-1.0 | 1.16.1 | b0926b61b0 | | ✗ | -| [benchmark] | Apache-2.0 | 1.5.1 | 1.5.0 | | | +| [benchmark] | Apache-2.0 | 1.5.2 | 1.5.2 | | | | [Boost] | BSL-1.0 | 1.73.0 | 1.70.0 | | ✗ | | [fmt] | BSD-2-Clause | 7.1.3 | 7.1.3 | | ✗ | | [GPerfTools] | BSD-3-Clause | 2.8 | 2.8 | | ✗ | diff --git a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py index 6bc5ba22dbf..db637793d5c 100644 --- a/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py +++ b/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py @@ -131,10 +131,23 @@ class _BenchmarkThreadsReport(object): """ CONTEXT_FIELDS = [ - "date", "cpu_scaling_enabled", "num_cpus", "mhz_per_cpu", "library_build_type", - "executable", "caches" + "date", + "num_cpus", + "mhz_per_cpu", + "library_build_type", + "executable", + "caches", + "cpu_scaling_enabled", ] - Context = collections.namedtuple("Context", CONTEXT_FIELDS) # type: ignore + + Context = collections.namedtuple( + typename="Context", + field_names=CONTEXT_FIELDS, + # We need a default for cpu_scaling_enabled, since newer + # google benchmark doesn't report a value if it can't make a + # determination. + defaults=["unknown"], + ) # type: ignore def __init__(self, context_dict): # `context_dict` was parsed from a json file and might have additional fields. diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 75e63cf34b1..1d142c56904 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -11564,16 +11564,12 @@ buildvariants: modules: - enterprise run_on: - - centos6-perf + - rhel80-medium expansions: compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars tasks: - name: compile_benchmarks - distros: - - rhel80-medium - name: .benchmarks - distros: - - centos6-perf - name: enterprise-rhel-80-64-bit-inmem display_name: Enterprise RHEL 8.0 (inMemory) diff --git a/src/third_party/benchmark/dist/README.md b/src/third_party/benchmark/dist/README.md index 45e41588438..41a1bdff757 100644 --- a/src/third_party/benchmark/dist/README.md +++ b/src/third_party/benchmark/dist/README.md @@ -1,10 +1,10 @@ # Benchmark + [![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) [![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master) [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) [![slackin](https://slackin-iqtfqnpzxd.now.sh/badge.svg)](https://slackin-iqtfqnpzxd.now.sh/) - A library to benchmark code snippets, similar to unit tests. Example: ```c++ @@ -49,9 +49,11 @@ The following minimum versions are required to build the library: * GCC 4.8 * Clang 3.4 -* Visual Studio 2013 +* Visual Studio 14 2015 * Intel 2015 Update 1 +See [Platform-Specific Build Instructions](#platform-specific-build-instructions). + ## Installation This describes the installation process using cmake. As pre-requisites, you'll @@ -65,37 +67,40 @@ versions of build tools._ $ git clone https://github.com/google/benchmark.git # Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory. $ git clone https://github.com/google/googletest.git benchmark/googletest +# Go to the library root directory +$ cd benchmark # Make a build directory to place the build output. -$ mkdir build && cd build -# Generate a Makefile with cmake. -# Use cmake -G <generator> to generate a different file type. -$ cmake ../benchmark +$ cmake -E make_directory "build" +# Generate build system files with cmake. +$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../ +# or, starting with CMake 3.13, use a simpler form: +# cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build" # Build the library. -$ make +$ cmake --build "build" --config Release ``` This builds the `benchmark` and `benchmark_main` libraries and tests. On a unix system, the build directory should now look something like this: ``` /benchmark -/build - /src - /libbenchmark.a - /libbenchmark_main.a - /test - ... + /build + /src + /libbenchmark.a + /libbenchmark_main.a + /test + ... ``` Next, you can run the tests to check the build. ```bash -$ make test +$ cmake -E chdir "build" ctest --build-config Release ``` If you want to install the library globally, also run: ``` -sudo make install +sudo cmake --build "build" --config Release --target install ``` Note that Google Benchmark requires Google Test to build and run the tests. This @@ -112,17 +117,14 @@ to `CMAKE_ARGS`. ### Debug vs Release By default, benchmark builds as a debug library. You will see a warning in the -output when this is the case. To build it as a release library instead, use: +output when this is the case. To build it as a release library instead, add +`-DCMAKE_BUILD_TYPE=Release` when generating the build system files, as shown +above. The use of `--config Release` in build commands is needed to properly +support multi-configuration tools (like Visual Studio for example) and can be +skipped for other build systems (like Makefile). -``` -cmake -DCMAKE_BUILD_TYPE=Release -``` - -To enable link-time optimisation, use - -``` -cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true -``` +To enable link-time optimisation, also add `-DBENCHMARK_ENABLE_LTO=true` when +generating the build system files. If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake cache variables, if autodetection fails. @@ -130,7 +132,6 @@ cache variables, if autodetection fails. If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, `LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. - ### Stable and Experimental Library Versions The main branch contains the latest stable version of the benchmarking library; @@ -144,7 +145,9 @@ this branch. However, this branch provides no stability guarantees and reserves the right to change and break the API at any time. ## Usage + ### Basic usage + Define a function that executes the code to measure, register it as a benchmark function using the `BENCHMARK` macro, and ensure an appropriate `main` function is available: @@ -171,14 +174,14 @@ BENCHMARK_MAIN(); ``` To run the benchmark, compile and link against the `benchmark` library -(libbenchmark.a/.so). If you followed the build steps above, this -library will be under the build directory you created. +(libbenchmark.a/.so). If you followed the build steps above, this library will +be under the build directory you created. ```bash # Example on linux after running the build steps above. Assumes the # `benchmark` and `build` directories are under the current directory. -$ g++ -std=c++11 -isystem benchmark/include -Lbuild/src -lpthread \ - -lbenchmark mybenchmark.cc -o mybenchmark +$ g++ mybenchmark.cc -std=c++11 -isystem benchmark/include \ + -Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark ``` Alternatively, link against the `benchmark_main` library and remove @@ -187,7 +190,29 @@ Alternatively, link against the `benchmark_main` library and remove The compiled executable will run all benchmarks by default. Pass the `--help` flag for option information or see the guide below. -### Platform-specific instructions +### Usage with CMake + +If using CMake, it is recommended to link against the project-provided +`benchmark::benchmark` and `benchmark::benchmark_main` targets using +`target_link_libraries`. +It is possible to use ```find_package``` to import an installed version of the +library. +```cmake +find_package(benchmark REQUIRED) +``` +Alternatively, ```add_subdirectory``` will incorporate the library directly in +to one's CMake project. +```cmake +add_subdirectory(benchmark) +``` +Either way, link to the library as follows. +```cmake +target_link_libraries(MyTarget benchmark::benchmark) +``` + +## Platform Specific Build Instructions + +### Building with GCC When the library is built using GCC it is necessary to link with the pthread library due to how GCC implements `std::thread`. Failing to link to pthread will @@ -197,8 +222,34 @@ can link to pthread by adding `-pthread` to your linker command. Note, you can also use `-lpthread`, but there are potential issues with ordering of command line parameters if you use that. -If you're running benchmarks on Windows, the shlwapi library (`-lshlwapi`) is -also required. +### Building with Visual Studio 2015 or 2017 + +The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following: + +``` +// Alternatively, can add libraries using linker options. +#ifdef _WIN32 +#pragma comment ( lib, "Shlwapi.lib" ) +#ifdef _DEBUG +#pragma comment ( lib, "benchmarkd.lib" ) +#else +#pragma comment ( lib, "benchmark.lib" ) +#endif +#endif +``` + +Can also use the graphical version of CMake: +* Open `CMake GUI`. +* Under `Where to build the binaries`, same path as source plus `build`. +* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`. +* Click `Configure`, `Generate`, `Open Project`. +* If build fails, try deleting entire directory and starting again, or unticking options to build less. + +### Building with Intel 2015 Update 1 or Intel System Studio Update 4 + +See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel. + +### Building on Solaris If you're running benchmarks on solaris, you'll want the kstat library linked in too (`-lkstat`). @@ -206,15 +257,19 @@ too (`-lkstat`). ## User Guide ### Command Line + [Output Formats](#output-formats) [Output Files](#output-files) +[Running Benchmarks](#running-benchmarks) + [Running a Subset of Benchmarks](#running-a-subset-of-benchmarks) [Result Comparison](#result-comparison) ### Library + [Runtime and Reporting Considerations](#runtime-and-reporting-considerations) [Passing Arguments](#passing-arguments) @@ -249,17 +304,20 @@ too (`-lkstat`). [Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) + <a name="output-formats" /> ### Output Formats The library supports multiple output formats. Use the -`--benchmark_format=<console|json|csv>` flag to set the format type. `console` -is the default format. +`--benchmark_format=<console|json|csv>` flag (or set the +`BENCHMARK_FORMAT=<console|json|csv>` environment variable) to set +the format type. `console` is the default format. The Console format is intended to be a human readable format. By default the format generates color output. Context is output on stderr and the tabular data on stdout. Example tabular output looks like: + ``` Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- @@ -273,6 +331,7 @@ The `context` attribute contains information about the run in general, including information about the CPU and the date. The `benchmarks` attribute contains a list of every benchmark run. Example json output looks like: + ```json { "context": { @@ -313,6 +372,7 @@ output looks like: The CSV format outputs comma-separated values. The `context` is output on stderr and the CSV itself on stdout. Example CSV output looks like: + ``` name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label "BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, @@ -324,16 +384,31 @@ name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label ### Output Files -Write benchmark results to a file with the `--benchmark_out=<filename>` option. -Specify the output format with `--benchmark_out_format={json|console|csv}`. Note that Specifying +Write benchmark results to a file with the `--benchmark_out=<filename>` option +(or set `BENCHMARK_OUT`). Specify the output format with +`--benchmark_out_format={json|console|csv}` (or set +`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that specifying `--benchmark_out` does not suppress the console output. +<a name="running-benchmarks" /> + +### Running Benchmarks + +Benchmarks are executed by running the produced binaries. Benchmarks binaries, +by default, accept options that may be specified either through their command +line interface or by setting environment variables before execution. For every +`--option_flag=<value>` CLI switch, a corresponding environment variable +`OPTION_FLAG=<value>` exist and is used as default if set (CLI switches always + prevails). A complete list of CLI options is available running benchmarks + with the `--help` switch. + <a name="running-a-subset-of-benchmarks" /> ### Running a Subset of Benchmarks -The `--benchmark_filter=<regex>` option can be used to only run the benchmarks -which match the specified `<regex>`. For example: +The `--benchmark_filter=<regex>` option (or `BENCHMARK_FILTER=<regex>` +environment variable) can be used to only run the benchmarks that match +the specified `<regex>`. For example: ```bash $ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32 @@ -351,7 +426,8 @@ BM_memcpy/32k 1834 ns 1837 ns 357143 ### Result comparison -It is possible to compare the benchmarking results. See [Additional Tooling Documentation](docs/tools.md) +It is possible to compare the benchmarking results. +See [Additional Tooling Documentation](docs/tools.md) <a name="runtime-and-reporting-considerations" /> @@ -417,8 +493,26 @@ range multiplier is changed to multiples of two. ```c++ BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); ``` + Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. +The preceding code shows a method of defining a sparse range. The following +example shows a method of defining a dense range. It is then used to benchmark +the performance of `std::vector` initialization for uniformly increasing sizes. + +```c++ +static void BM_DenseRange(benchmark::State& state) { + for(auto _ : state) { + std::vector<int> v(state.range(0), state.range(0)); + benchmark::DoNotOptimize(v.data()); + benchmark::ClobberMemory(); + } +} +BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128); +``` + +Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ]. + You might have a benchmark that depends on two or more inputs. For example, the following code defines a family of benchmarks for measuring the speed of set insertion. @@ -454,6 +548,29 @@ pair. BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` +Some benchmarks may require specific argument values that cannot be expressed +with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a +benchmark input for each combination in the product of the supplied vectors. + +```c++ +BENCHMARK(BM_SetInsert) + ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) +// would generate the same benchmark arguments as +BENCHMARK(BM_SetInsert) + ->Args({1<<10, 20}) + ->Args({3<<10, 20}) + ->Args({8<<10, 20}) + ->Args({3<<10, 40}) + ->Args({8<<10, 40}) + ->Args({1<<10, 40}) + ->Args({1<<10, 60}) + ->Args({3<<10, 60}) + ->Args({8<<10, 60}) + ->Args({1<<10, 80}) + ->Args({3<<10, 80}) + ->Args({8<<10, 80}); +``` + For more complex patterns of inputs, passing a custom function to `Apply` allows programmatic specification of an arbitrary set of arguments on which to run the benchmark. The following example enumerates a dense range on one parameter, @@ -486,6 +603,7 @@ void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { // the specified values to `extra_args`. BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); ``` + Note that elements of `...args` may refer to global variables. Users should avoid modifying global state inside of a benchmark. @@ -523,7 +641,7 @@ that might be used to customize high-order term calculation. ```c++ BENCHMARK(BM_StringCompare)->RangeMultiplier(2) - ->Range(1<<10, 1<<18)->Complexity([](int64_t n)->double{return n; }); + ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; }); ``` <a name="templated-benchmarks" /> @@ -610,6 +728,7 @@ Also you can create templated fixture by using the following macros: * `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)` For example: + ```c++ template<typename T> class MyFixture : public benchmark::Fixture {}; @@ -659,9 +778,9 @@ the resulting sum is the value which will be shown for the benchmark. The `Counter` constructor accepts three parameters: the value as a `double` ; a bit flag which allows you to show counters as rates, and/or as per-thread -iteration, and/or as per-thread averages, and/or iteration invariants; -and a flag specifying the 'unit' - i.e. is 1k a 1000 (default, -`benchmark::Counter::OneK::kIs1000`), or 1024 +iteration, and/or as per-thread averages, and/or iteration invariants, +and/or finally inverting the result; and a flag specifying the 'unit' - i.e. +is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024 (`benchmark::Counter::OneK::kIs1024`)? ```c++ @@ -670,8 +789,14 @@ and a flag specifying the 'unit' - i.e. is 1k a 1000 (default, // Set the counter as a rate. It will be presented divided // by the duration of the benchmark. + // Meaning: per one second, how many 'foo's are processed? state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate); + // Set the counter as a rate. It will be presented divided + // by the duration of the benchmark, and the result inverted. + // Meaning: how many seconds it takes to process one 'foo'? + state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert); + // Set the counter as a thread-average quantity. It will // be presented divided by the number of threads. state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads); @@ -697,7 +822,7 @@ When you're compiling in C++11 mode or later you can use `insert()` with #### Counter Reporting -When using the console reporter, by default, user counters are are printed at +When using the console reporter, by default, user counters are printed at the end after the table, the same way as ``bytes_processed`` and ``items_processed``. This is best for cases in which there are few counters, or where there are only a couple of lines per benchmark. Here's an example of @@ -758,6 +883,7 @@ BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416 BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159 BM_CalculatePi/threads:8 2255 ns 9943 ns 70936 ``` + Note above the additional header printed when the benchmark changes from ``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does not have the same counter set as ``BM_UserCounter``. @@ -818,7 +944,7 @@ static void MyMain(int size) { static void BM_OpenMP(benchmark::State& state) { for (auto _ : state) - MyMain(state.range(0); + MyMain(state.range(0)); } // Measure the time spent by the main thread, use it to decide for how long to @@ -849,7 +975,7 @@ BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime(); Normally, the entire duration of the work loop (`for (auto _ : state) {}`) is measured. But sometimes, it is necessary to do some work inside of that loop, every iteration, but without counting that time to the benchmark time. -That is possible, althought it is not recommended, since it has high overhead. +That is possible, although it is not recommended, since it has high overhead. ```c++ static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { @@ -895,7 +1021,7 @@ static void BM_ManualTiming(benchmark::State& state) { auto start = std::chrono::high_resolution_clock::now(); // Simulate some useful workload with a sleep std::this_thread::sleep_for(sleep_duration); - auto end = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); auto elapsed_seconds = std::chrono::duration_cast<std::chrono::duration<double>>( @@ -1077,7 +1203,9 @@ Users must explicitly exit the loop, otherwise all iterations will be performed. Users may explicitly return to exit the benchmark immediately. The `SkipWithError(...)` function may be used at any point within the benchmark, -including before and after the benchmark loop. +including before and after the benchmark loop. Moreover, if `SkipWithError(...)` +has been used, it is not required to reach the benchmark loop and one may return +from the benchmark function early. For example: @@ -1085,24 +1213,32 @@ For example: static void BM_test(benchmark::State& state) { auto resource = GetResource(); if (!resource.good()) { - state.SkipWithError("Resource is not good!"); - // KeepRunning() loop will not be entered. + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. } - for (state.KeepRunning()) { - auto data = resource.read_data(); - if (!resource.good()) { - state.SkipWithError("Failed to read data!"); - break; // Needed to skip the rest of the iteration. - } - do_stuff(data); + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); } } static void BM_test_ranged_fo(benchmark::State & state) { - state.SkipWithError("test will not be entered"); + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + return; // Early return is allowed when SkipWithError() has been used. + } for (auto _ : state) { - state.SkipWithError("Failed!"); - break; // REQUIRED to prevent all further iterations. + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // REQUIRED to prevent all further iterations. + } + do_stuff(data); } } ``` @@ -1167,11 +1303,15 @@ the benchmark loop should be preferred. <a name="disabling-cpu-frequency-scaling" /> ### Disabling CPU Frequency Scaling + If you see this error: + ``` ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ``` + you might want to disable the CPU frequency scaling while running the benchmark: + ```bash sudo cpupower frequency-set --governor performance ./mybench diff --git a/src/third_party/benchmark/dist/include/benchmark/benchmark.h b/src/third_party/benchmark/dist/include/benchmark/benchmark.h index 4f40501596e..b906352849c 100644 --- a/src/third_party/benchmark/dist/include/benchmark/benchmark.h +++ b/src/third_party/benchmark/dist/include/benchmark/benchmark.h @@ -176,6 +176,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #include <map> #include <set> #include <string> +#include <utility> #include <vector> #if defined(BENCHMARK_HAS_CXX11) @@ -368,7 +369,10 @@ class Counter { // It will be presented divided by the number of iterations. kAvgIterations = 1U << 3U, // Mark the counter as a iteration-average rate. See above. - kAvgIterationsRate = kIsRate | kAvgIterations + kAvgIterationsRate = kIsRate | kAvgIterations, + + // In the end, invert the result. This is always done last! + kInvert = 1U << 31U }; enum OneK { @@ -538,6 +542,9 @@ class State { // responsibility to exit the scope as needed. void SkipWithError(const char* msg); + // Returns true if an error has been reported with 'SkipWithError(...)'. + bool error_occurred() const { return error_occurred_; } + // REQUIRES: called exactly once per iteration of the benchmarking loop. // Set the manually measured time for this benchmark iteration, which // is used instead of automatically measured time if UseManualTime() was @@ -574,7 +581,7 @@ class State { void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; } BENCHMARK_ALWAYS_INLINE - int64_t complexity_length_n() { return complexity_n_; } + int64_t complexity_length_n() const { return complexity_n_; } // If this routine is called with items > 0, then an items/s // label is printed on the benchmark report line for the currently @@ -821,6 +828,11 @@ class Benchmark { // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges); + // Run this benchmark once for each combination of values in the (cartesian) + // product of the supplied argument lists. + // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... + Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists); + // Equivalent to ArgNames({name}) Benchmark* ArgName(const std::string& name); @@ -1291,10 +1303,16 @@ struct CPUInfo { int num_sharing; }; + enum Scaling { + UNKNOWN, + ENABLED, + DISABLED + }; + int num_cpus; double cycles_per_second; std::vector<CacheInfo> caches; - bool scaling_enabled; + Scaling scaling; std::vector<double> load_avg; static const CPUInfo& Get(); diff --git a/src/third_party/benchmark/dist/src/benchmark.cc b/src/third_party/benchmark/dist/src/benchmark.cc index 29bfa3512f9..1c049f28844 100644 --- a/src/third_party/benchmark/dist/src/benchmark.cc +++ b/src/third_party/benchmark/dist/src/benchmark.cc @@ -51,66 +51,60 @@ #include "thread_manager.h" #include "thread_timer.h" -DEFINE_bool(benchmark_list_tests, false, - "Print a list of benchmarks. This option overrides all other " - "options."); - -DEFINE_string(benchmark_filter, ".", - "A regular expression that specifies the set of benchmarks " - "to execute. If this flag is empty, or if this flag is the " - "string \"all\", all benchmarks linked into the binary are " - "run."); - -DEFINE_double(benchmark_min_time, 0.5, - "Minimum number of seconds we should run benchmark before " - "results are considered significant. For cpu-time based " - "tests, this is the lower bound on the total cpu time " - "used by all threads that make up the test. For real-time " - "based tests, this is the lower bound on the elapsed time " - "of the benchmark execution, regardless of number of " - "threads."); - -DEFINE_int32(benchmark_repetitions, 1, - "The number of runs of each benchmark. If greater than 1, the " - "mean and standard deviation of the runs will be reported."); - -DEFINE_bool( - benchmark_report_aggregates_only, false, - "Report the result of each benchmark repetitions. When 'true' is specified " - "only the mean, standard deviation, and other statistics are reported for " - "repeated benchmarks. Affects all reporters."); - -DEFINE_bool( - benchmark_display_aggregates_only, false, - "Display the result of each benchmark repetitions. When 'true' is " - "specified only the mean, standard deviation, and other statistics are " - "displayed for repeated benchmarks. Unlike " - "benchmark_report_aggregates_only, only affects the display reporter, but " - "*NOT* file reporter, which will still contain all the output."); - -DEFINE_string(benchmark_format, "console", - "The format to use for console output. Valid values are " - "'console', 'json', or 'csv'."); - -DEFINE_string(benchmark_out_format, "json", - "The format to use for file output. Valid values are " - "'console', 'json', or 'csv'."); - -DEFINE_string(benchmark_out, "", "The file to write additional output to"); - -DEFINE_string(benchmark_color, "auto", - "Whether to use colors in the output. Valid values: " - "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use " - "colors if the output is being sent to a terminal and the TERM " - "environment variable is set to a terminal type that supports " - "colors."); - -DEFINE_bool(benchmark_counters_tabular, false, - "Whether to use tabular format when printing user counters to " - "the console. Valid values: 'true'/'yes'/1, 'false'/'no'/0." - "Defaults to false."); - -DEFINE_int32(v, 0, "The level of verbose logging to output"); +// Print a list of benchmarks. This option overrides all other options. +DEFINE_bool(benchmark_list_tests, false); + +// A regular expression that specifies the set of benchmarks to execute. If +// this flag is empty, or if this flag is the string \"all\", all benchmarks +// linked into the binary are run. +DEFINE_string(benchmark_filter, "."); + +// Minimum number of seconds we should run benchmark before results are +// considered significant. For cpu-time based tests, this is the lower bound +// on the total cpu time used by all threads that make up the test. For +// real-time based tests, this is the lower bound on the elapsed time of the +// benchmark execution, regardless of number of threads. +DEFINE_double(benchmark_min_time, 0.5); + +// The number of runs of each benchmark. If greater than 1, the mean and +// standard deviation of the runs will be reported. +DEFINE_int32(benchmark_repetitions, 1); + +// Report the result of each benchmark repetitions. When 'true' is specified +// only the mean, standard deviation, and other statistics are reported for +// repeated benchmarks. Affects all reporters. +DEFINE_bool(benchmark_report_aggregates_only, false); + +// Display the result of each benchmark repetitions. When 'true' is specified +// only the mean, standard deviation, and other statistics are displayed for +// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects +// the display reporter, but *NOT* file reporter, which will still contain +// all the output. +DEFINE_bool(benchmark_display_aggregates_only, false); + +// The format to use for console output. +// Valid values are 'console', 'json', or 'csv'. +DEFINE_string(benchmark_format, "console"); + +// The format to use for file output. +// Valid values are 'console', 'json', or 'csv'. +DEFINE_string(benchmark_out_format, "json"); + +// The file to write additional output to. +DEFINE_string(benchmark_out, ""); + +// Whether to use colors in the output. Valid values: +// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if +// the output is being sent to a terminal and the TERM environment variable is +// set to a terminal type that supports colors. +DEFINE_string(benchmark_color, "auto"); + +// Whether to use tabular format when printing user counters to the console. +// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. +DEFINE_bool(benchmark_counters_tabular, false); + +// The level of verbose logging to output +DEFINE_int32(v, 0); namespace benchmark { @@ -148,7 +142,7 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, // which must be suppressed. #if defined(__INTEL_COMPILER) #pragma warning push -#pragma warning(disable:1875) +#pragma warning(disable : 1875) #elif defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winvalid-offsetof" @@ -289,6 +283,13 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, flushStreams(file_reporter); } +// Disable deprecated warnings temporarily because we need to reference +// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + std::unique_ptr<BenchmarkReporter> CreateReporter( std::string const& name, ConsoleReporter::OutputOptions output_opts) { typedef std::unique_ptr<BenchmarkReporter> PtrType; @@ -304,6 +305,10 @@ std::unique_ptr<BenchmarkReporter> CreateReporter( } } +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + } // end namespace bool IsZero(double n) { @@ -312,7 +317,7 @@ bool IsZero(double n) { ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { int output_opts = ConsoleReporter::OO_Defaults; - auto is_benchmark_color = [force_no_color] () -> bool { + auto is_benchmark_color = [force_no_color]() -> bool { if (force_no_color) { return false; } @@ -430,7 +435,7 @@ void ParseCommandLineFlags(int* argc, char** argv) { using namespace benchmark; BenchmarkReporter::Context::executable_name = (argc && *argc > 0) ? argv[0] : "unknown"; - for (int i = 1; i < *argc; ++i) { + for (int i = 1; argc && i < *argc; ++i) { if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || diff --git a/src/third_party/benchmark/dist/src/benchmark_register.cc b/src/third_party/benchmark/dist/src/benchmark_register.cc index 6696c382b80..65d9944f4f9 100644 --- a/src/third_party/benchmark/dist/src/benchmark_register.cc +++ b/src/third_party/benchmark/dist/src/benchmark_register.cc @@ -31,10 +31,13 @@ #include <fstream> #include <iostream> #include <memory> +#include <numeric> #include <sstream> #include <thread> +#ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS +#endif #include <inttypes.h> #include "benchmark/benchmark.h" @@ -301,33 +304,41 @@ Benchmark* Benchmark::Ranges( const std::vector<std::pair<int64_t, int64_t>>& ranges) { CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); std::vector<std::vector<int64_t>> arglists(ranges.size()); - std::size_t total = 1; for (std::size_t i = 0; i < ranges.size(); i++) { AddRange(&arglists[i], ranges[i].first, ranges[i].second, range_multiplier_); - total *= arglists[i].size(); } - std::vector<std::size_t> ctr(arglists.size(), 0); - - for (std::size_t i = 0; i < total; i++) { - std::vector<int64_t> tmp; - tmp.reserve(arglists.size()); + ArgsProduct(arglists); - for (std::size_t j = 0; j < arglists.size(); j++) { - tmp.push_back(arglists[j].at(ctr[j])); - } + return this; +} - args_.push_back(std::move(tmp)); +Benchmark* Benchmark::ArgsProduct( + const std::vector<std::vector<int64_t>>& arglists) { + CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size())); - for (std::size_t j = 0; j < arglists.size(); j++) { - if (ctr[j] + 1 < arglists[j].size()) { - ++ctr[j]; - break; - } - ctr[j] = 0; + std::vector<std::size_t> indices(arglists.size()); + const std::size_t total = std::accumulate( + std::begin(arglists), std::end(arglists), std::size_t{1}, + [](const std::size_t res, const std::vector<int64_t>& arglist) { + return res * arglist.size(); + }); + std::vector<int64_t> args; + args.reserve(arglists.size()); + for (std::size_t i = 0; i < total; i++) { + for (std::size_t arg = 0; arg < arglists.size(); arg++) { + args.push_back(arglists[arg][indices[arg]]); } + args_.push_back(args); + args.clear(); + + std::size_t arg = 0; + do { + indices[arg] = (indices[arg] + 1) % arglists[arg].size(); + } while (indices[arg++] == 0 && arg < arglists.size()); } + return this; } diff --git a/src/third_party/benchmark/dist/src/benchmark_runner.cc b/src/third_party/benchmark/dist/src/benchmark_runner.cc index 0bae6a545ef..7bc6b6329ef 100644 --- a/src/third_party/benchmark/dist/src/benchmark_runner.cc +++ b/src/third_party/benchmark/dist/src/benchmark_runner.cc @@ -117,7 +117,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); State st = b->Run(iters, thread_id, &timer, manager); - CHECK(st.iterations() >= st.max_iterations) + CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); @@ -263,8 +263,9 @@ class BenchmarkRunner { if (multiplier <= 1.0) multiplier = 2.0; // So what seems to be the sufficiently-large iteration count? Round up. - const IterationCount max_next_iters = - 0.5 + std::max(multiplier * i.iters, i.iters + 1.0); + const IterationCount max_next_iters = static_cast<IterationCount>( + std::lround(std::max(multiplier * static_cast<double>(i.iters), + static_cast<double>(i.iters) + 1.0))); // But we do have *some* sanity limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); diff --git a/src/third_party/benchmark/dist/src/commandlineflags.cc b/src/third_party/benchmark/dist/src/commandlineflags.cc index 6bd65c5ae70..0648fe3a06e 100644 --- a/src/third_party/benchmark/dist/src/commandlineflags.cc +++ b/src/third_party/benchmark/dist/src/commandlineflags.cc @@ -14,6 +14,7 @@ #include "commandlineflags.h" +#include <algorithm> #include <cctype> #include <cstdlib> #include <cstring> @@ -87,49 +88,45 @@ static std::string FlagToEnvVar(const char* flag) { for (size_t i = 0; i != flag_str.length(); ++i) env_var += static_cast<char>(::toupper(flag_str.c_str()[i])); - return "BENCHMARK_" + env_var; + return env_var; } } // namespace -// Reads and returns the Boolean environment variable corresponding to -// the given flag; if it's not set, returns default_value. -// -// The value is considered true iff it's not "0". -bool BoolFromEnv(const char* flag, bool default_value) { +bool BoolFromEnv(const char* flag, bool default_val) { const std::string env_var = FlagToEnvVar(flag); - const char* const string_value = getenv(env_var.c_str()); - return string_value == nullptr ? default_value - : strcmp(string_value, "0") != 0; + const char* const value_str = getenv(env_var.c_str()); + return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); } -// Reads and returns a 32-bit integer stored in the environment -// variable corresponding to the given flag; if it isn't set or -// doesn't represent a valid 32-bit integer, returns default_value. -int32_t Int32FromEnv(const char* flag, int32_t default_value) { +int32_t Int32FromEnv(const char* flag, int32_t default_val) { const std::string env_var = FlagToEnvVar(flag); - const char* const string_value = getenv(env_var.c_str()); - if (string_value == nullptr) { - // The environment variable is not set. - return default_value; + const char* const value_str = getenv(env_var.c_str()); + int32_t value = default_val; + if (value_str == nullptr || + !ParseInt32(std::string("Environment variable ") + env_var, value_str, + &value)) { + return default_val; } + return value; +} - int32_t result = default_value; - if (!ParseInt32(std::string("Environment variable ") + env_var, string_value, - &result)) { - std::cout << "The default value " << default_value << " is used.\n"; - return default_value; +double DoubleFromEnv(const char* flag, double default_val) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value_str = getenv(env_var.c_str()); + double value = default_val; + if (value_str == nullptr || + !ParseDouble(std::string("Environment variable ") + env_var, value_str, + &value)) { + return default_val; } - - return result; + return value; } -// Reads and returns the string environment variable corresponding to -// the given flag; if it's not set, returns default_value. -const char* StringFromEnv(const char* flag, const char* default_value) { +const char* StringFromEnv(const char* flag, const char* default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value = getenv(env_var.c_str()); - return value == nullptr ? default_value : value; + return value == nullptr ? default_val : value; } // Parses a string as a command line flag. The string should have @@ -214,9 +211,18 @@ bool IsFlag(const char* str, const char* flag) { } bool IsTruthyFlagValue(const std::string& value) { - if (value.empty()) return true; - char ch = value[0]; - return isalnum(ch) && - !(ch == '0' || ch == 'f' || ch == 'F' || ch == 'n' || ch == 'N'); + if (value.size() == 1) { + char v = value[0]; + return isalnum(v) && + !(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N'); + } else if (!value.empty()) { + std::string value_lower(value); + std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(), + [](char c) { return static_cast<char>(::tolower(c)); }); + return !(value_lower == "false" || value_lower == "no" || + value_lower == "off"); + } else + return true; } + } // end namespace benchmark diff --git a/src/third_party/benchmark/dist/src/commandlineflags.h b/src/third_party/benchmark/dist/src/commandlineflags.h index 5eaea82a59b..3a1f6a8dbc9 100644 --- a/src/third_party/benchmark/dist/src/commandlineflags.h +++ b/src/third_party/benchmark/dist/src/commandlineflags.h @@ -10,23 +10,51 @@ // Macros for declaring flags. #define DECLARE_bool(name) extern bool FLAG(name) #define DECLARE_int32(name) extern int32_t FLAG(name) -#define DECLARE_int64(name) extern int64_t FLAG(name) #define DECLARE_double(name) extern double FLAG(name) #define DECLARE_string(name) extern std::string FLAG(name) // Macros for defining flags. -#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val) -#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val) -#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val) -#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val) -#define DEFINE_string(name, default_val, doc) \ - std::string FLAG(name) = (default_val) +#define DEFINE_bool(name, default_val) \ + bool FLAG(name) = \ + benchmark::BoolFromEnv(#name, default_val) +#define DEFINE_int32(name, default_val) \ + int32_t FLAG(name) = \ + benchmark::Int32FromEnv(#name, default_val) +#define DEFINE_double(name, default_val) \ + double FLAG(name) = \ + benchmark::DoubleFromEnv(#name, default_val) +#define DEFINE_string(name, default_val) \ + std::string FLAG(name) = \ + benchmark::StringFromEnv(#name, default_val) namespace benchmark { -// Parses a bool/Int32/string from the environment variable -// corresponding to the given Google Test flag. + +// Parses a bool from the environment variable +// corresponding to the given flag. +// +// If the variable exists, returns IsTruthyFlagValue() value; if not, +// returns the given default value. bool BoolFromEnv(const char* flag, bool default_val); + +// Parses an Int32 from the environment variable +// corresponding to the given flag. +// +// If the variable exists, returns ParseInt32() value; if not, returns +// the given default value. int32_t Int32FromEnv(const char* flag, int32_t default_val); + +// Parses an Double from the environment variable +// corresponding to the given flag. +// +// If the variable exists, returns ParseDouble(); if not, returns +// the given default value. +double DoubleFromEnv(const char* flag, double default_val); + +// Parses a string from the environment variable +// corresponding to the given flag. +// +// If variable exists, returns its value; if not, returns +// the given default value. const char* StringFromEnv(const char* flag, const char* default_val); // Parses a string for a bool flag, in the form of either @@ -65,9 +93,11 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value); bool IsFlag(const char* str, const char* flag); // Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or -// some non-alphanumeric character. As a special case, also returns true if -// value is the empty string. +// some non-alphanumeric character. Also returns false if the value matches +// one of 'no', 'false', 'off' (case-insensitive). As a special case, also +// returns true if value is the empty string. bool IsTruthyFlagValue(const std::string& value); + } // end namespace benchmark #endif // BENCHMARK_COMMANDLINEFLAGS_H_ diff --git a/src/third_party/benchmark/dist/src/console_reporter.cc b/src/third_party/benchmark/dist/src/console_reporter.cc index cc8ae276f6b..6fd764525e8 100644 --- a/src/third_party/benchmark/dist/src/console_reporter.cc +++ b/src/third_party/benchmark/dist/src/console_reporter.cc @@ -12,21 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "benchmark/benchmark.h" -#include "complexity.h" -#include "counter.h" - #include <algorithm> #include <cstdint> #include <cstdio> +#include <cstring> #include <iostream> #include <string> #include <tuple> #include <vector> +#include "benchmark/benchmark.h" #include "check.h" #include "colorprint.h" #include "commandlineflags.h" +#include "complexity.h" +#include "counter.h" #include "internal_macros.h" #include "string_util.h" #include "timers.h" @@ -156,16 +156,14 @@ void ConsoleReporter::PrintRunData(const Run& result) { const std::size_t cNameLen = std::max(std::string::size_type(10), c.first.length()); auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); + const char* unit = ""; + if (c.second.flags & Counter::kIsRate) + unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; if (output_options_ & OO_Tabular) { - if (c.second.flags & Counter::kIsRate) { - printer(Out, COLOR_DEFAULT, " %*s/s", cNameLen - 2, s.c_str()); - } else { - printer(Out, COLOR_DEFAULT, " %*s", cNameLen, s.c_str()); - } - } else { - const char* unit = (c.second.flags & Counter::kIsRate) ? "/s" : ""; - printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), + printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), unit); + } else { + printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit); } } diff --git a/src/third_party/benchmark/dist/src/counter.cc b/src/third_party/benchmark/dist/src/counter.cc index c248ea110bc..cf5b78ee3ac 100644 --- a/src/third_party/benchmark/dist/src/counter.cc +++ b/src/third_party/benchmark/dist/src/counter.cc @@ -32,6 +32,10 @@ double Finish(Counter const& c, IterationCount iterations, double cpu_time, if (c.flags & Counter::kAvgIterations) { v /= iterations; } + + if (c.flags & Counter::kInvert) { // Invert is *always* last. + v = 1.0 / v; + } return v; } diff --git a/src/third_party/benchmark/dist/src/counter.h b/src/third_party/benchmark/dist/src/counter.h index 1ad46d4940e..1f5a58e31f0 100644 --- a/src/third_party/benchmark/dist/src/counter.h +++ b/src/third_party/benchmark/dist/src/counter.h @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef BENCHMARK_COUNTER_H_ +#define BENCHMARK_COUNTER_H_ + #include "benchmark/benchmark.h" namespace benchmark { @@ -25,3 +28,5 @@ bool SameNames(UserCounters const& l, UserCounters const& r); } // end namespace internal } // end namespace benchmark + +#endif // BENCHMARK_COUNTER_H_ diff --git a/src/third_party/benchmark/dist/src/cycleclock.h b/src/third_party/benchmark/dist/src/cycleclock.h index f5e37b011b9..179c67cd614 100644 --- a/src/third_party/benchmark/dist/src/cycleclock.h +++ b/src/third_party/benchmark/dist/src/cycleclock.h @@ -84,13 +84,21 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { return (high << 32) | low; #elif defined(__powerpc__) || defined(__ppc__) // This returns a time-base, which is not always precisely a cycle-count. - int64_t tbl, tbu0, tbu1; - asm("mftbu %0" : "=r"(tbu0)); - asm("mftb %0" : "=r"(tbl)); - asm("mftbu %0" : "=r"(tbu1)); - tbl &= -static_cast<int64_t>(tbu0 == tbu1); - // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) - return (tbu1 << 32) | tbl; +#if defined(__powerpc64__) || defined(__ppc64__) + int64_t tb; + asm volatile("mfspr %0, 268" : "=r"(tb)); + return tb; +#else + uint32_t tbl, tbu0, tbu1; + asm volatile( + "mftbu %0\n" + "mftbl %1\n" + "mftbu %2" + : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); + tbl &= -static_cast<int32_t>(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) + return (static_cast<uint64_t>(tbu1) << 32) | tbl; +#endif #elif defined(__sparc__) int64_t tick; asm(".byte 0x83, 0x41, 0x00, 0x00"); @@ -164,6 +172,27 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { uint64_t tsc; asm("stck %0" : "=Q"(tsc) : : "cc"); return tsc; +#elif defined(__riscv) // RISC-V + // Use RDCYCLE (and RDCYCLEH on riscv32) +#if __riscv_xlen == 32 + uint32_t cycles_lo, cycles_hi0, cycles_hi1; + // This asm also includes the PowerPC overflow handling strategy, as above. + // Implemented in assembly because Clang insisted on branching. + asm volatile( + "rdcycleh %0\n" + "rdcycle %1\n" + "rdcycleh %2\n" + "sub %0, %0, %2\n" + "seqz %0, %0\n" + "sub %0, zero, %0\n" + "and %1, %1, %0\n" + : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); + return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo; +#else + uint64_t cycles; + asm volatile("rdcycle %0" : "=r"(cycles)); + return cycles; +#endif #else // The soft failover to a generic implementation is automatic only for ARM. // For other platforms the developer is expected to make an attempt to create diff --git a/src/third_party/benchmark/dist/src/json_reporter.cc b/src/third_party/benchmark/dist/src/json_reporter.cc index 0495d96688c..54f3ecfdbc4 100644 --- a/src/third_party/benchmark/dist/src/json_reporter.cc +++ b/src/third_party/benchmark/dist/src/json_reporter.cc @@ -92,7 +92,7 @@ std::string FormatKV(std::string const& key, double value) { return ss.str(); } -int64_t RoundDouble(double v) { return static_cast<int64_t>(v + 0.5); } +int64_t RoundDouble(double v) { return std::lround(v); } } // end namespace @@ -134,8 +134,10 @@ bool JSONReporter::ReportContext(const Context& context) { << FormatKV("mhz_per_cpu", RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; - out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled) - << ",\n"; + if (CPUInfo::Scaling::UNKNOWN != info.scaling) { + out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false) + << ",\n"; + } out << indent << "\"caches\": [\n"; indent = std::string(6, ' '); @@ -147,7 +149,7 @@ bool JSONReporter::ReportContext(const Context& context) { out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level)) << ",\n"; out << cache_indent - << FormatKV("size", static_cast<int64_t>(CI.size) * 1000u) << ",\n"; + << FormatKV("size", static_cast<int64_t>(CI.size)) << ",\n"; out << cache_indent << FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing)) << "\n"; diff --git a/src/third_party/benchmark/dist/src/mutex.h b/src/third_party/benchmark/dist/src/mutex.h index 5f461d05a0c..3fac79aea41 100644 --- a/src/third_party/benchmark/dist/src/mutex.h +++ b/src/third_party/benchmark/dist/src/mutex.h @@ -71,7 +71,7 @@ typedef std::condition_variable Condition; // NOTE: Wrappers for std::mutex and std::unique_lock are provided so that // we can annotate them with thread safety attributes and use the // -Wthread-safety warning with clang. The standard library types cannot be -// used directly because they do not provided the required annotations. +// used directly because they do not provide the required annotations. class CAPABILITY("mutex") Mutex { public: Mutex() {} diff --git a/src/third_party/benchmark/dist/src/reporter.cc b/src/third_party/benchmark/dist/src/reporter.cc index 4d3e477d44a..337575a1187 100644 --- a/src/third_party/benchmark/dist/src/reporter.cc +++ b/src/third_party/benchmark/dist/src/reporter.cc @@ -49,7 +49,7 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, Out << "CPU Caches:\n"; for (auto &CInfo : info.caches) { Out << " L" << CInfo.level << " " << CInfo.type << " " - << (CInfo.size / 1000) << "K"; + << (CInfo.size / 1024) << " KiB"; if (CInfo.num_sharing != 0) Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")"; Out << "\n"; @@ -64,7 +64,7 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, Out << "\n"; } - if (info.scaling_enabled) { + if (CPUInfo::Scaling::ENABLED == info.scaling) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " "real time measurements may be noisy and will incur extra " "overhead.\n"; diff --git a/src/third_party/benchmark/dist/src/string_util.cc b/src/third_party/benchmark/dist/src/string_util.cc index 39b01a1719a..ac60b5588f0 100644 --- a/src/third_party/benchmark/dist/src/string_util.cc +++ b/src/third_party/benchmark/dist/src/string_util.cc @@ -1,6 +1,9 @@ #include "string_util.h" #include <array> +#ifdef BENCHMARK_STL_ANDROID_GNUSTL +#include <cerrno> +#endif #include <cmath> #include <cstdarg> #include <cstdio> diff --git a/src/third_party/benchmark/dist/src/sysinfo.cc b/src/third_party/benchmark/dist/src/sysinfo.cc index 28126470bad..8bab9320f1d 100644 --- a/src/third_party/benchmark/dist/src/sysinfo.cc +++ b/src/third_party/benchmark/dist/src/sysinfo.cc @@ -57,6 +57,7 @@ #include <memory> #include <sstream> #include <locale> +#include <utility> #include "check.h" #include "cycleclock.h" @@ -209,11 +210,11 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) { return f.good(); } -bool CpuScalingEnabled(int num_cpus) { +CPUInfo::Scaling CpuScaling(int num_cpus) { // We don't have a valid CPU count, so don't even bother. - if (num_cpus <= 0) return false; + if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; #ifdef BENCHMARK_OS_QNX - return false; + return CPUInfo::Scaling::UNKNOWN; #endif #ifndef BENCHMARK_OS_WINDOWS // On Linux, the CPUfreq subsystem exposes CPU information as files on the @@ -223,10 +224,11 @@ bool CpuScalingEnabled(int num_cpus) { for (int cpu = 0; cpu < num_cpus; ++cpu) { std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - if (ReadFromFile(governor_file, &res) && res != "performance") return true; + if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; } + return CPUInfo::Scaling::DISABLED; #endif - return false; + return CPUInfo::Scaling::UNKNOWN; } int CountSetBitsInCPUMap(std::string Val) { @@ -270,7 +272,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() { else if (f && suffix != "K") PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); else if (suffix == "K") - info.size *= 1000; + info.size *= 1024; } if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) PrintErrorAndDie("Failed to read from file ", FPath, "type"); @@ -382,9 +384,11 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesQNX() { case CACHE_FLAG_UNIFIED : info.type = "Unified"; info.level = 2; + break; case CACHE_FLAG_SHARED : info.type = "Shared"; info.level = 3; + break; default : continue; break; @@ -429,11 +433,20 @@ std::string GetSystemName() { #endif return str; #else // defined(BENCHMARK_OS_WINDOWS) +#ifndef HOST_NAME_MAX #ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 +#elif defined(BENCHMARK_OS_NACL) +#define HOST_NAME_MAX 64 #elif defined(BENCHMARK_OS_QNX) #define HOST_NAME_MAX 154 +#elif defined(BENCHMARK_OS_RTEMS) +#define HOST_NAME_MAX 256 +#else +#warning "HOST_NAME_MAX not defined. using 64" +#define HOST_NAME_MAX 64 #endif +#endif // def HOST_NAME_MAX char hostname[HOST_NAME_MAX]; int retVal = gethostname(hostname, HOST_NAME_MAX); if (retVal != 0) return std::string(""); @@ -686,7 +699,7 @@ CPUInfo::CPUInfo() : num_cpus(GetNumCPUs()), cycles_per_second(GetCPUCyclesPerSecond()), caches(GetCacheSizes()), - scaling_enabled(CpuScalingEnabled(num_cpus)), + scaling(CpuScaling(num_cpus)), load_avg(GetLoadAvg()) {} diff --git a/src/third_party/benchmark/dist/src/thread_manager.h b/src/third_party/benchmark/dist/src/thread_manager.h index 1720281f0a1..28e2dd53aff 100644 --- a/src/third_party/benchmark/dist/src/thread_manager.h +++ b/src/third_party/benchmark/dist/src/thread_manager.h @@ -11,7 +11,7 @@ namespace internal { class ThreadManager { public: - ThreadManager(int num_threads) + explicit ThreadManager(int num_threads) : alive_threads_(num_threads), start_stop_barrier_(num_threads) {} Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) { diff --git a/src/third_party/benchmark/dist/src/thread_timer.h b/src/third_party/benchmark/dist/src/thread_timer.h index fbd298d3bd4..1703ca0d6f8 100644 --- a/src/third_party/benchmark/dist/src/thread_timer.h +++ b/src/third_party/benchmark/dist/src/thread_timer.h @@ -43,19 +43,19 @@ class ThreadTimer { bool running() const { return running_; } // REQUIRES: timer is not running - double real_time_used() { + double real_time_used() const { CHECK(!running_); return real_time_used_; } // REQUIRES: timer is not running - double cpu_time_used() { + double cpu_time_used() const { CHECK(!running_); return cpu_time_used_; } // REQUIRES: timer is not running - double manual_time_used() { + double manual_time_used() const { CHECK(!running_); return manual_time_used_; } diff --git a/src/third_party/benchmark/dist/src/timers.cc b/src/third_party/benchmark/dist/src/timers.cc index 7613ff92c6e..4f76eddc1d3 100644 --- a/src/third_party/benchmark/dist/src/timers.cc +++ b/src/third_party/benchmark/dist/src/timers.cc @@ -178,40 +178,67 @@ double ThreadCPUUsage() { #endif } -namespace { - -std::string DateTimeString(bool local) { +std::string LocalDateTimeString() { + // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM. typedef std::chrono::system_clock Clock; std::time_t now = Clock::to_time_t(Clock::now()); - const std::size_t kStorageSize = 128; - char storage[kStorageSize]; - std::size_t written; + const std::size_t kTzOffsetLen = 6; + const std::size_t kTimestampLen = 19; + + std::size_t tz_len; + std::size_t timestamp_len; + long int offset_minutes; + char tz_offset_sign = '+'; + // Long enough buffers to avoid format-overflow warnings + char tz_offset[128]; + char storage[128]; - if (local) { #if defined(BENCHMARK_OS_WINDOWS) - written = - std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); + std::tm *timeinfo_p = ::localtime(&now); #else - std::tm timeinfo; - ::localtime_r(&now, &timeinfo); - written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); + std::tm timeinfo; + std::tm *timeinfo_p = &timeinfo; + ::localtime_r(&now, &timeinfo); #endif + + tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p); + + if (tz_len < kTzOffsetLen && tz_len > 1) { + // Timezone offset was written. strftime writes offset as +HHMM or -HHMM, + // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse + // the offset as an integer, then reprint it to a string. + + offset_minutes = ::strtol(tz_offset, NULL, 10); + if (offset_minutes < 0) { + offset_minutes *= -1; + tz_offset_sign = '-'; + } + + tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", + tz_offset_sign, offset_minutes / 100, offset_minutes % 100); + CHECK(tz_len == kTzOffsetLen); + ((void)tz_len); // Prevent unused variable warning in optimized build. } else { + // Unknown offset. RFC3339 specifies that unknown local offsets should be + // written as UTC time with -00:00 timezone. #if defined(BENCHMARK_OS_WINDOWS) - written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); + // Potential race condition if another thread calls localtime or gmtime. + timeinfo_p = ::gmtime(&now); #else - std::tm timeinfo; ::gmtime_r(&now, &timeinfo); - written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); #endif + + strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); } - CHECK(written < kStorageSize); - ((void)written); // prevent unused variable in optimized mode. - return std::string(storage); -} -} // end namespace + timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", + timeinfo_p); + CHECK(timestamp_len == kTimestampLen); + // Prevent unused variable warning in optimized build. + ((void)kTimestampLen); -std::string LocalDateTimeString() { return DateTimeString(true); } + std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1); + return std::string(storage); +} } // end namespace benchmark diff --git a/src/third_party/benchmark/scripts/import.sh b/src/third_party/benchmark/scripts/import.sh index 627c496cdbf..84957135857 100755 --- a/src/third_party/benchmark/scripts/import.sh +++ b/src/third_party/benchmark/scripts/import.sh @@ -7,7 +7,7 @@ IFS=$'\n\t' NAME="benchmark" -LIB_GIT_REV="mongo/v1.5.0" +LIB_GIT_REV="mongo/v1.5.2" LIB_GIT_URL="https://github.com/mongodb-forks/benchmark.git" LIB_GIT_DIR=$(mktemp -d /tmp/import-benchmark.XXXXXX) |