diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2022-11-20 09:45:01 -0500 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2022-11-20 10:58:33 -0500 |
commit | 1cd6c9bba0b4ba3018bf1b28fee645a7dd98fe68 (patch) | |
tree | 2e5e19653ad2d215a9f02758c18ade54d08f357d | |
parent | 66c45143008366726293a341405d45a3a8e9ed87 (diff) | |
download | python-coveragepy-git-1cd6c9bba0b4ba3018bf1b28fee645a7dd98fe68.tar.gz |
perf: more combine speed-ups
By avoiding writing metadata that differs but doesn't change the data,
we get a higher hitrate on the hash-checking when combining.
Use --debug=process to include these details for debugging.
-rw-r--r-- | CHANGES.rst | 6 | ||||
-rw-r--r-- | coverage/sqldata.py | 21 | ||||
-rw-r--r-- | doc/cmd.rst | 3 | ||||
-rw-r--r-- | doc/dbschema.rst | 5 | ||||
-rw-r--r-- | tests/test_data.py | 20 |
5 files changed, 42 insertions, 13 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 5e3fb44c..00aa97f0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -31,8 +31,8 @@ Unreleased - Combining data files with ``coverage combine`` now quickly hashes the data files to skip files that provide no new information. This can reduce the - time needed. For coverage.py's own test suite, combining was about 17% - faster. + time needed. Many details affect the results, but for coverage.py's own test + suite, combining was about 40% faster. - When searching for completely unexecuted files, coverage.py uses the presence of ``__init__.py`` files to determine which directories have source that @@ -51,7 +51,7 @@ Unreleased - Fixed a mis-measurement of a strange use of wildcard alternatives in match/case statements, closing `issue 1421`_. -- The ``[run] note`` setting has been completely removed. +- The deprecated ``[run] note`` setting has been completely removed. .. _implicit namespace packages: https://peps.python.org/pep-0420/ .. _issue 1383: https://github.com/nedbat/coveragepy/issues/1383 diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 2fbc53f5..ea6b1199 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -4,6 +4,7 @@ """SQLite coverage data.""" import collections +import datetime import functools import glob import itertools @@ -51,10 +52,11 @@ CREATE TABLE meta ( key text, value text, unique (key) - -- Keys: + -- Possible keys: -- 'has_arcs' boolean -- Is this data recording branches? -- 'sys_argv' text -- The coverage command line that recorded the data. -- 'version' text -- The version of coverage.py that made the file. + -- 'when' text -- Datetime when the file was created. ); CREATE TABLE file ( @@ -298,13 +300,18 @@ class CoverageData(SimpleReprMixin): self._debug.write(f"Initing data file {self._filename!r}") db.executescript(SCHEMA) db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,)) - db.executemany( - "insert or ignore into meta (key, value) values (?, ?)", - [ + + # When writing metadata, avoid information that will needlessly change + # the hash of the data file, unless we're debugging processes. + meta_data = [ + ("version", __version__), + ] + if self._debug.should("process"): + meta_data.extend([ ("sys_argv", str(getattr(sys, "argv", None))), - ("version", __version__), - ] - ) + ("when", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + ]) + db.executemany("insert or ignore into meta (key, value) values (?, ?)", meta_data) def _connect(self): """Get the SqliteDb object to use.""" diff --git a/doc/cmd.rst b/doc/cmd.rst index be323c0c..919b6d88 100644 --- a/doc/cmd.rst +++ b/doc/cmd.rst @@ -1025,7 +1025,8 @@ of operation to log: * ``plugin``: print information about plugin operations. * ``process``: show process creation information, and changes in the current - directory. + directory. This also writes a timestamp and command arguments into the data + file. * ``pybehave``: show the values of `internal flags <env.py_>`_ describing the behavior of the current version of Python. diff --git a/doc/dbschema.rst b/doc/dbschema.rst index 42e616d9..b576acaa 100644 --- a/doc/dbschema.rst +++ b/doc/dbschema.rst @@ -66,10 +66,11 @@ This is the database schema: key text, value text, unique (key) - -- Keys: + -- Possible keys: -- 'has_arcs' boolean -- Is this data recording branches? -- 'sys_argv' text -- The coverage command line that recorded the data. -- 'version' text -- The version of coverage.py that made the file. + -- 'when' text -- Datetime when the file was created. ); CREATE TABLE file ( @@ -115,7 +116,7 @@ This is the database schema: foreign key (file_id) references file (id) ); -.. [[[end]]] (checksum: 9d87794485a9aa6d9064b735972a3447) +.. [[[end]]] (checksum: 6a04d14b07f08f86cccf43056328dcb7) .. _numbits: diff --git a/tests/test_data.py b/tests/test_data.py index b1a215e2..79c90420 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -904,6 +904,26 @@ class CoverageDataFilesTest(CoverageTest): assert_measured_files(covdata3, MEASURED_FILES_1_2) self.assert_file_count(glob.escape(basename) + ".*", 0) + def test_meta_data(self): + # The metadata written to the data file shouldn't interfere with + # hashing to remove duplicates, except for debug=process, which + # writes debugging info as metadata. + debug = DebugControlString(options=[]) + covdata1 = CoverageData(basename="meta.1", debug=debug) + covdata1.add_lines(LINES_1) + covdata1.write() + with sqlite3.connect("meta.1") as con: + data = sorted(k for (k,) in con.execute("select key from meta")) + assert data == ["has_arcs", "version"] + + debug = DebugControlString(options=["process"]) + covdata2 = CoverageData(basename="meta.2", debug=debug) + covdata2.add_lines(LINES_1) + covdata2.write() + with sqlite3.connect("meta.2") as con: + data = sorted(k for (k,) in con.execute("select key from meta")) + assert data == ["has_arcs", "sys_argv", "version", "when"] + class DumpsLoadsTest(CoverageTest): """Tests of CoverageData.dumps and loads.""" |