benchmark| Potential solution for performance regressions (#3473)

* benchmark| Add benchmarking option to tox * benchmark| Adds basic performance benchmark baselines for pylint Here we establish baseline benchmarks for the system when used in minimal way. Here we just confirm that -j1 vs -jN gives some boost in performance under simple situations, establishing a baseline for other benchmarks. Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
author: Frank Harrison <frank@doublethefish.com> 2020-04-26 21:07:00 +0100
committer: GitHub <noreply@github.com> 2020-04-26 22:07:00 +0200
commit: 9a11ae2cc9b20d6f570f5a3e410354902ef818b2 (patch)
tree: a3d6fecf65ce4b7774a465e1eaa504901b24a4a8
parent: be5a61b13e48a129613e0c659bfd28bf9824f53c (diff)
download: pylint-git-9a11ae2cc9b20d6f570f5a3e410354902ef818b2.tar.gz
3 files changed, 352 insertions, 2 deletions
diff --git a/tests/benchmark/test_baseline_benchmarks.py b/tests/benchmark/test_baseline_benchmarks.py
new file mode 100644
index 000000000..1984ef72a
--- /dev/null
+++ b/tests/benchmark/test_baseline_benchmarks.py
@@ -0,0 +1,322 @@
+""" Profiles basic -jX functionality """
+# Copyright (c) 2020 Frank Harrison <doublethefish@gmail.com>
+
+# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
+
+# pylint: disable=protected-access,missing-function-docstring,no-self-use
+
+import os
+import pprint
+import time
+
+import pytest
+
+import pylint.interfaces
+from pylint.checkers.base_checker import BaseChecker
+from pylint.lint import PyLinter, Run, check_parallel
+from pylint.testutils import TestReporter as Reporter
+from pylint.utils import register_plugins
+
+
+def _empty_filepath():
+    return os.path.abspath(
+        os.path.join(
+            os.path.dirname(__file__), "..", "input", "benchmark_minimal_file.py"
+        )
+    )
+
+
+class SleepingChecker(BaseChecker):
+    """ A checker that sleeps, the wall-clock time should reduce as we add workers
+
+    As we apply a roughly constant amount of "work" in this checker any variance is
+    likely to be caused by the pylint system. """
+
+    __implements__ = (pylint.interfaces.IRawChecker,)
+
+    name = "sleeper"
+    msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)}
+    sleep_duration = 0.5  # the time to pretend we're doing work for
+
+    def process_module(self, _astroid):
+        """ Sleeps for `sleep_duration` on each call
+
+        This effectively means each file costs ~`sleep_duration`+framework overhead """
+        time.sleep(self.sleep_duration)
+
+
+class SleepingCheckerLong(BaseChecker):
+    """ A checker that sleeps, the wall-clock time should reduce as we add workers
+
+    As we apply a roughly constant amount of "work" in this checker any variance is
+    likely to be caused by the pylint system. """
+
+    __implements__ = (pylint.interfaces.IRawChecker,)
+
+    name = "long-sleeper"
+    msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)}
+    sleep_duration = 0.5  # the time to pretend we're doing work for
+
+    def process_module(self, _astroid):
+        """ Sleeps for `sleep_duration` on each call
+
+        This effectively means each file costs ~`sleep_duration`+framework overhead """
+        time.sleep(self.sleep_duration)
+
+
+class NoWorkChecker(BaseChecker):
+    """ A checker that sleeps, the wall-clock time should change as we add threads """
+
+    __implements__ = (pylint.interfaces.IRawChecker,)
+
+    name = "sleeper"
+    msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)}
+
+    def process_module(self, _astroid):
+        pass
+
+
+@pytest.mark.benchmark(group="baseline",)
+class TestEstablishBaselineBenchmarks:
+    """ Naive benchmarks for the high-level pylint framework
+
+    Because this benchmarks the fundemental and common parts and changes seen here will
+    impact everything else """
+
+    empty_filepath = _empty_filepath()
+    empty_file_info = (
+        "name-emptyfile-file",
+        _empty_filepath(),
+        "modname-emptyfile-mod",
+    )
+    lot_of_files = 500
+
+    def test_baseline_benchmark_j1(self, benchmark):
+        """ Establish a baseline of pylint performance with no work
+
+        We will add extra Checkers in other benchmarks.
+
+        Because this is so simple, if this regresses something very serious has happened
+        """
+        linter = PyLinter(reporter=Reporter())
+        fileinfos = [self.empty_filepath]  # Single file to end-to-end the system
+        assert linter.config.jobs == 1
+        assert len(linter._checkers) == 1, "Should just have 'master'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_benchmark_j10(self, benchmark):
+        """ Establish a baseline of pylint performance with no work across threads
+
+        Same as `test_baseline_benchmark_j1` but we use -j10 with 10 fake files to
+        ensure end-to-end-system invoked.
+
+        Because this is also so simple, if this regresses something very serious has
+        happened.
+        """
+        linter = PyLinter(reporter=Reporter())
+        linter.config.jobs = 10
+
+        # Create file per worker, using all workers
+        fileinfos = [self.empty_filepath for _ in range(linter.config.jobs)]
+
+        assert linter.config.jobs == 10
+        assert len(linter._checkers) == 1, "Should have 'master'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_benchmark_check_parallel_j10(self, benchmark):
+        """ Should demonstrate times very close to `test_baseline_benchmark_j10` """
+        linter = PyLinter(reporter=Reporter())
+
+        # Create file per worker, using all workers
+        fileinfos = [self.empty_file_info for _ in range(linter.config.jobs)]
+
+        assert len(linter._checkers) == 1, "Should have 'master'"
+        benchmark(check_parallel, linter, jobs=10, files=fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_lots_of_files_j1(self, benchmark):
+        """ Establish a baseline with only 'master' checker being run in -j1
+
+        We do not register any checkers except the default 'master', so the cost is just
+        that of the system with a lot of files registerd """
+        if benchmark.disabled:
+            benchmark(print, "skipping, only benchmark large file counts")
+            return  # _only_ run this test is profiling
+        linter = PyLinter(reporter=Reporter())
+        linter.config.jobs = 1
+        fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
+        assert linter.config.jobs == 1
+        assert len(linter._checkers) == 1, "Should have 'master'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_lots_of_files_j10(self, benchmark):
+        """ Establish a baseline with only 'master' checker being run in -j10
+
+        As with the -j1 variant above `test_baseline_lots_of_files_j1`, we do not
+        register any checkers except the default 'master', so the cost is just that of
+        the check_parallel system across 10 workers, plus the overhead of PyLinter """
+        if benchmark.disabled:
+            benchmark(print, "skipping, only benchmark large file counts")
+            return  # _only_ run this test is profiling
+        linter = PyLinter(reporter=Reporter())
+        linter.config.jobs = 10
+        fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
+        assert linter.config.jobs == 10
+        assert len(linter._checkers) == 1, "Should have 'master'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_lots_of_files_j1_empty_checker(self, benchmark):
+        """ Baselines pylint for a single extra checker being run in -j1, for N-files
+
+        We use a checker that does no work, so the cost is just that of the system at
+        scale """
+        if benchmark.disabled:
+            benchmark(print, "skipping, only benchmark large file counts")
+            return  # _only_ run this test is profiling
+        linter = PyLinter(reporter=Reporter())
+        linter.config.jobs = 1
+        linter.register_checker(NoWorkChecker(linter))
+        fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
+        assert linter.config.jobs == 1
+        assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_lots_of_files_j10_empty_checker(self, benchmark):
+        """ Baselines pylint for a single extra checker being run in -j10, for N-files
+
+        We use a checker that does no work, so the cost is just that of the system at
+        scale, across workers """
+        if benchmark.disabled:
+            benchmark(print, "skipping, only benchmark large file counts")
+            return  # _only_ run this test is profiling
+        linter = PyLinter(reporter=Reporter())
+        linter.config.jobs = 10
+        linter.register_checker(NoWorkChecker(linter))
+        fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
+        assert linter.config.jobs == 10
+        assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_benchmark_j1_single_working_checker(self, benchmark):
+        """ Establish a baseline of single-worker performance for PyLinter
+
+        Here we mimick a single Checker that does some work so that we can see the
+        impact of running a simple system with -j1 against the same system with -j10.
+
+        We expect this benchmark to take very close to
+        `numfiles*SleepingChecker.sleep_duration` """
+        if benchmark.disabled:
+            benchmark(print, "skipping, do not want to sleep in main tests")
+            return  # _only_ run this test is profiling
+        linter = PyLinter(reporter=Reporter())
+        linter.register_checker(SleepingChecker(linter))
+
+        # Check the same number of files as
+        # `test_baseline_benchmark_j10_single_working_checker`
+        fileinfos = [self.empty_filepath for _ in range(10)]
+
+        assert linter.config.jobs == 1
+        assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_benchmark_j10_single_working_checker(self, benchmark):
+        """ Establishes baseline of multi-worker performance for PyLinter/check_parallel
+
+        We expect this benchmark to take less time that test_baseline_benchmark_j1,
+        `error_margin*(1/J)*(numfiles*SleepingChecker.sleep_duration)`
+
+        Because of the cost of the framework and system the performance difference will
+        *not* be 1/10 of -j1 versions. """
+        if benchmark.disabled:
+            benchmark(print, "skipping, do not want to sleep in main tests")
+            return  # _only_ run this test is profiling
+        linter = PyLinter(reporter=Reporter())
+        linter.config.jobs = 10
+        linter.register_checker(SleepingChecker(linter))
+
+        # Check the same number of files as
+        # `test_baseline_benchmark_j1_single_working_checker`
+        fileinfos = [self.empty_filepath for _ in range(10)]
+
+        assert linter.config.jobs == 10
+        assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
+        benchmark(linter.check, fileinfos)
+        assert linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(linter.reporter.messages)
+        )
+
+    def test_baseline_benchmark_j1_all_checks_single_file(self, benchmark):
+        """ Runs a single file, with -j1, against all plug-ins
+
+        ... that's the intent at least.
+        """
+        # Just 1 file, but all Checkers/Extensions
+        fileinfos = [self.empty_filepath]
+
+        runner = benchmark(Run, fileinfos, reporter=Reporter(), do_exit=False)
+        assert runner.linter.config.jobs == 1
+        print("len(runner.linter._checkers)", len(runner.linter._checkers))
+        assert len(runner.linter._checkers) > 1, "Should have more than 'master'"
+
+        assert runner.linter.msg_status == 0, (
+            "Expected no errors to be thrown: %s"
+            % pprint.pformat(runner.linter.reporter.messages)
+        )
+
+    def test_baseline_benchmark_j1_all_checks_lots_of_files(self, benchmark):
+        """ Runs lots of files, with -j1, against all plug-ins
+
+        ... that's the intent at least.
+        """
+        if benchmark.disabled:
+            benchmark(print, "skipping, only benchmark large file counts")
+            return  # _only_ run this test is profiling
+        linter = PyLinter()
+
+        # Register all checkers/extensions and enable them
+        register_plugins(
+            linter, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+        )
+        linter.load_default_plugins()
+        linter.enable("all")
+
+        # Just 1 file, but all Checkers/Extensions
+        fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
+
+        assert linter.config.jobs == 1
+        print("len(linter._checkers)", len(linter._checkers))
+        assert len(linter._checkers) > 1, "Should have more than 'master'"
+        benchmark(linter.check, fileinfos)
diff --git a/tests/input/benchmark_minimal_file.py b/tests/input/benchmark_minimal_file.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/input/benchmark_minimal_file.py
diff --git a/tox.ini b/tox.ini
index 6d02a3da5..59d9e4f39 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py35, py36, py37, py38, pypy, pylint
+envlist = py35, py36, py37, py38, pypy, pylint, benchmark
 skip_missing_interpreters = true
 
 [testenv:pylint]
@@ -53,13 +53,15 @@ deps =
    mccabe
    pytest
    pytest-xdist
+   pytest-benchmark
    pytest-profiling
 
 setenv =
     COVERAGE_FILE = {toxinidir}/.coverage.{envname}
 
 commands =
-    python -Wignore -m coverage run -m pytest {toxinidir}/tests/ {posargs:}
+    ; Run tests, ensuring all benchmark tests do not run
+    python -Wignore -m coverage run -m pytest --benchmark-disable {toxinidir}/tests/ {posargs:}
 
     ; Transform absolute path to relative path
     ; for compatibility with coveralls.io and fix 'source not available' error.
@@ -132,3 +134,29 @@ commands =
   rm -f extensions.rst
   python ./exts/pylint_extensions.py
   sphinx-build -W -b html -d _build/doctrees . _build/html
+
+[testenv:benchmark]
+deps =
+   https://github.com/PyCQA/astroid/tarball/master#egg=astroid-master-2.0
+   coverage<5.0
+   isort
+   mccabe
+   pytest
+   pytest-xdist
+   pygal
+   pytest-benchmark
+
+commands =
+    ; Run the only the benchmark tests, grouping output and forcing .json output so we
+    ; can compare benchmark runs
+    python -Wi -m pytest --exitfirst \
+                         --failed-first \
+                         --benchmark-only \
+                         --benchmark-save=batch_files \
+                         --benchmark-save-data \
+                         --benchmark-autosave \
+                         {toxinidir}/tests \
+                         --benchmark-group-by="group" \
+                         {posargs:}
+
+changedir = {toxworkdir}
author	Frank Harrison <frank@doublethefish.com>	2020-04-26 21:07:00 +0100
committer	GitHub <noreply@github.com>	2020-04-26 22:07:00 +0200
commit	9a11ae2cc9b20d6f570f5a3e410354902ef818b2 (patch)
tree	a3d6fecf65ce4b7774a465e1eaa504901b24a4a8
parent	be5a61b13e48a129613e0c659bfd28bf9824f53c (diff)
download	pylint-git-9a11ae2cc9b20d6f570f5a3e410354902ef818b2.tar.gz