From 9a11ae2cc9b20d6f570f5a3e410354902ef818b2 Mon Sep 17 00:00:00 2001 From: Frank Harrison Date: Sun, 26 Apr 2020 21:07:00 +0100 Subject: benchmark| Potential solution for performance regressions (#3473) * benchmark| Add benchmarking option to tox * benchmark| Adds basic performance benchmark baselines for pylint Here we establish baseline benchmarks for the system when used in minimal way. Here we just confirm that -j1 vs -jN gives some boost in performance under simple situations, establishing a baseline for other benchmarks. Co-authored-by: Pierre Sassoulas --- tests/benchmark/test_baseline_benchmarks.py | 322 ++++++++++++++++++++++++++++ tests/input/benchmark_minimal_file.py | 0 tox.ini | 32 ++- 3 files changed, 352 insertions(+), 2 deletions(-) create mode 100644 tests/benchmark/test_baseline_benchmarks.py create mode 100644 tests/input/benchmark_minimal_file.py diff --git a/tests/benchmark/test_baseline_benchmarks.py b/tests/benchmark/test_baseline_benchmarks.py new file mode 100644 index 000000000..1984ef72a --- /dev/null +++ b/tests/benchmark/test_baseline_benchmarks.py @@ -0,0 +1,322 @@ +""" Profiles basic -jX functionality """ +# Copyright (c) 2020 Frank Harrison + +# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html +# For details: https://github.com/PyCQA/pylint/blob/master/COPYING + +# pylint: disable=protected-access,missing-function-docstring,no-self-use + +import os +import pprint +import time + +import pytest + +import pylint.interfaces +from pylint.checkers.base_checker import BaseChecker +from pylint.lint import PyLinter, Run, check_parallel +from pylint.testutils import TestReporter as Reporter +from pylint.utils import register_plugins + + +def _empty_filepath(): + return os.path.abspath( + os.path.join( + os.path.dirname(__file__), "..", "input", "benchmark_minimal_file.py" + ) + ) + + +class SleepingChecker(BaseChecker): + """ A checker that sleeps, the wall-clock time should reduce as we add workers + + As we apply a roughly constant amount of "work" in this checker any variance is + likely to be caused by the pylint system. """ + + __implements__ = (pylint.interfaces.IRawChecker,) + + name = "sleeper" + msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)} + sleep_duration = 0.5 # the time to pretend we're doing work for + + def process_module(self, _astroid): + """ Sleeps for `sleep_duration` on each call + + This effectively means each file costs ~`sleep_duration`+framework overhead """ + time.sleep(self.sleep_duration) + + +class SleepingCheckerLong(BaseChecker): + """ A checker that sleeps, the wall-clock time should reduce as we add workers + + As we apply a roughly constant amount of "work" in this checker any variance is + likely to be caused by the pylint system. """ + + __implements__ = (pylint.interfaces.IRawChecker,) + + name = "long-sleeper" + msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)} + sleep_duration = 0.5 # the time to pretend we're doing work for + + def process_module(self, _astroid): + """ Sleeps for `sleep_duration` on each call + + This effectively means each file costs ~`sleep_duration`+framework overhead """ + time.sleep(self.sleep_duration) + + +class NoWorkChecker(BaseChecker): + """ A checker that sleeps, the wall-clock time should change as we add threads """ + + __implements__ = (pylint.interfaces.IRawChecker,) + + name = "sleeper" + msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)} + + def process_module(self, _astroid): + pass + + +@pytest.mark.benchmark(group="baseline",) +class TestEstablishBaselineBenchmarks: + """ Naive benchmarks for the high-level pylint framework + + Because this benchmarks the fundemental and common parts and changes seen here will + impact everything else """ + + empty_filepath = _empty_filepath() + empty_file_info = ( + "name-emptyfile-file", + _empty_filepath(), + "modname-emptyfile-mod", + ) + lot_of_files = 500 + + def test_baseline_benchmark_j1(self, benchmark): + """ Establish a baseline of pylint performance with no work + + We will add extra Checkers in other benchmarks. + + Because this is so simple, if this regresses something very serious has happened + """ + linter = PyLinter(reporter=Reporter()) + fileinfos = [self.empty_filepath] # Single file to end-to-end the system + assert linter.config.jobs == 1 + assert len(linter._checkers) == 1, "Should just have 'master'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_benchmark_j10(self, benchmark): + """ Establish a baseline of pylint performance with no work across threads + + Same as `test_baseline_benchmark_j1` but we use -j10 with 10 fake files to + ensure end-to-end-system invoked. + + Because this is also so simple, if this regresses something very serious has + happened. + """ + linter = PyLinter(reporter=Reporter()) + linter.config.jobs = 10 + + # Create file per worker, using all workers + fileinfos = [self.empty_filepath for _ in range(linter.config.jobs)] + + assert linter.config.jobs == 10 + assert len(linter._checkers) == 1, "Should have 'master'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_benchmark_check_parallel_j10(self, benchmark): + """ Should demonstrate times very close to `test_baseline_benchmark_j10` """ + linter = PyLinter(reporter=Reporter()) + + # Create file per worker, using all workers + fileinfos = [self.empty_file_info for _ in range(linter.config.jobs)] + + assert len(linter._checkers) == 1, "Should have 'master'" + benchmark(check_parallel, linter, jobs=10, files=fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_lots_of_files_j1(self, benchmark): + """ Establish a baseline with only 'master' checker being run in -j1 + + We do not register any checkers except the default 'master', so the cost is just + that of the system with a lot of files registerd """ + if benchmark.disabled: + benchmark(print, "skipping, only benchmark large file counts") + return # _only_ run this test is profiling + linter = PyLinter(reporter=Reporter()) + linter.config.jobs = 1 + fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] + assert linter.config.jobs == 1 + assert len(linter._checkers) == 1, "Should have 'master'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_lots_of_files_j10(self, benchmark): + """ Establish a baseline with only 'master' checker being run in -j10 + + As with the -j1 variant above `test_baseline_lots_of_files_j1`, we do not + register any checkers except the default 'master', so the cost is just that of + the check_parallel system across 10 workers, plus the overhead of PyLinter """ + if benchmark.disabled: + benchmark(print, "skipping, only benchmark large file counts") + return # _only_ run this test is profiling + linter = PyLinter(reporter=Reporter()) + linter.config.jobs = 10 + fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] + assert linter.config.jobs == 10 + assert len(linter._checkers) == 1, "Should have 'master'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_lots_of_files_j1_empty_checker(self, benchmark): + """ Baselines pylint for a single extra checker being run in -j1, for N-files + + We use a checker that does no work, so the cost is just that of the system at + scale """ + if benchmark.disabled: + benchmark(print, "skipping, only benchmark large file counts") + return # _only_ run this test is profiling + linter = PyLinter(reporter=Reporter()) + linter.config.jobs = 1 + linter.register_checker(NoWorkChecker(linter)) + fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] + assert linter.config.jobs == 1 + assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_lots_of_files_j10_empty_checker(self, benchmark): + """ Baselines pylint for a single extra checker being run in -j10, for N-files + + We use a checker that does no work, so the cost is just that of the system at + scale, across workers """ + if benchmark.disabled: + benchmark(print, "skipping, only benchmark large file counts") + return # _only_ run this test is profiling + linter = PyLinter(reporter=Reporter()) + linter.config.jobs = 10 + linter.register_checker(NoWorkChecker(linter)) + fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] + assert linter.config.jobs == 10 + assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_benchmark_j1_single_working_checker(self, benchmark): + """ Establish a baseline of single-worker performance for PyLinter + + Here we mimick a single Checker that does some work so that we can see the + impact of running a simple system with -j1 against the same system with -j10. + + We expect this benchmark to take very close to + `numfiles*SleepingChecker.sleep_duration` """ + if benchmark.disabled: + benchmark(print, "skipping, do not want to sleep in main tests") + return # _only_ run this test is profiling + linter = PyLinter(reporter=Reporter()) + linter.register_checker(SleepingChecker(linter)) + + # Check the same number of files as + # `test_baseline_benchmark_j10_single_working_checker` + fileinfos = [self.empty_filepath for _ in range(10)] + + assert linter.config.jobs == 1 + assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_benchmark_j10_single_working_checker(self, benchmark): + """ Establishes baseline of multi-worker performance for PyLinter/check_parallel + + We expect this benchmark to take less time that test_baseline_benchmark_j1, + `error_margin*(1/J)*(numfiles*SleepingChecker.sleep_duration)` + + Because of the cost of the framework and system the performance difference will + *not* be 1/10 of -j1 versions. """ + if benchmark.disabled: + benchmark(print, "skipping, do not want to sleep in main tests") + return # _only_ run this test is profiling + linter = PyLinter(reporter=Reporter()) + linter.config.jobs = 10 + linter.register_checker(SleepingChecker(linter)) + + # Check the same number of files as + # `test_baseline_benchmark_j1_single_working_checker` + fileinfos = [self.empty_filepath for _ in range(10)] + + assert linter.config.jobs == 10 + assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" + benchmark(linter.check, fileinfos) + assert linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(linter.reporter.messages) + ) + + def test_baseline_benchmark_j1_all_checks_single_file(self, benchmark): + """ Runs a single file, with -j1, against all plug-ins + + ... that's the intent at least. + """ + # Just 1 file, but all Checkers/Extensions + fileinfos = [self.empty_filepath] + + runner = benchmark(Run, fileinfos, reporter=Reporter(), do_exit=False) + assert runner.linter.config.jobs == 1 + print("len(runner.linter._checkers)", len(runner.linter._checkers)) + assert len(runner.linter._checkers) > 1, "Should have more than 'master'" + + assert runner.linter.msg_status == 0, ( + "Expected no errors to be thrown: %s" + % pprint.pformat(runner.linter.reporter.messages) + ) + + def test_baseline_benchmark_j1_all_checks_lots_of_files(self, benchmark): + """ Runs lots of files, with -j1, against all plug-ins + + ... that's the intent at least. + """ + if benchmark.disabled: + benchmark(print, "skipping, only benchmark large file counts") + return # _only_ run this test is profiling + linter = PyLinter() + + # Register all checkers/extensions and enable them + register_plugins( + linter, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + ) + linter.load_default_plugins() + linter.enable("all") + + # Just 1 file, but all Checkers/Extensions + fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] + + assert linter.config.jobs == 1 + print("len(linter._checkers)", len(linter._checkers)) + assert len(linter._checkers) > 1, "Should have more than 'master'" + benchmark(linter.check, fileinfos) diff --git a/tests/input/benchmark_minimal_file.py b/tests/input/benchmark_minimal_file.py new file mode 100644 index 000000000..e69de29bb diff --git a/tox.ini b/tox.ini index 6d02a3da5..59d9e4f39 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py35, py36, py37, py38, pypy, pylint +envlist = py35, py36, py37, py38, pypy, pylint, benchmark skip_missing_interpreters = true [testenv:pylint] @@ -53,13 +53,15 @@ deps = mccabe pytest pytest-xdist + pytest-benchmark pytest-profiling setenv = COVERAGE_FILE = {toxinidir}/.coverage.{envname} commands = - python -Wignore -m coverage run -m pytest {toxinidir}/tests/ {posargs:} + ; Run tests, ensuring all benchmark tests do not run + python -Wignore -m coverage run -m pytest --benchmark-disable {toxinidir}/tests/ {posargs:} ; Transform absolute path to relative path ; for compatibility with coveralls.io and fix 'source not available' error. @@ -132,3 +134,29 @@ commands = rm -f extensions.rst python ./exts/pylint_extensions.py sphinx-build -W -b html -d _build/doctrees . _build/html + +[testenv:benchmark] +deps = + https://github.com/PyCQA/astroid/tarball/master#egg=astroid-master-2.0 + coverage<5.0 + isort + mccabe + pytest + pytest-xdist + pygal + pytest-benchmark + +commands = + ; Run the only the benchmark tests, grouping output and forcing .json output so we + ; can compare benchmark runs + python -Wi -m pytest --exitfirst \ + --failed-first \ + --benchmark-only \ + --benchmark-save=batch_files \ + --benchmark-save-data \ + --benchmark-autosave \ + {toxinidir}/tests \ + --benchmark-group-by="group" \ + {posargs:} + +changedir = {toxworkdir} -- cgit v1.2.1