summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2022-11-20 09:45:01 -0500
committerNed Batchelder <ned@nedbatchelder.com>2022-11-20 10:58:33 -0500
commit1cd6c9bba0b4ba3018bf1b28fee645a7dd98fe68 (patch)
tree2e5e19653ad2d215a9f02758c18ade54d08f357d
parent66c45143008366726293a341405d45a3a8e9ed87 (diff)
downloadpython-coveragepy-git-1cd6c9bba0b4ba3018bf1b28fee645a7dd98fe68.tar.gz
perf: more combine speed-ups
By avoiding writing metadata that differs but doesn't change the data, we get a higher hitrate on the hash-checking when combining. Use --debug=process to include these details for debugging.
-rw-r--r--CHANGES.rst6
-rw-r--r--coverage/sqldata.py21
-rw-r--r--doc/cmd.rst3
-rw-r--r--doc/dbschema.rst5
-rw-r--r--tests/test_data.py20
5 files changed, 42 insertions, 13 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 5e3fb44c..00aa97f0 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -31,8 +31,8 @@ Unreleased
- Combining data files with ``coverage combine`` now quickly hashes the data
files to skip files that provide no new information. This can reduce the
- time needed. For coverage.py's own test suite, combining was about 17%
- faster.
+ time needed. Many details affect the results, but for coverage.py's own test
+ suite, combining was about 40% faster.
- When searching for completely unexecuted files, coverage.py uses the presence
of ``__init__.py`` files to determine which directories have source that
@@ -51,7 +51,7 @@ Unreleased
- Fixed a mis-measurement of a strange use of wildcard alternatives in
match/case statements, closing `issue 1421`_.
-- The ``[run] note`` setting has been completely removed.
+- The deprecated ``[run] note`` setting has been completely removed.
.. _implicit namespace packages: https://peps.python.org/pep-0420/
.. _issue 1383: https://github.com/nedbat/coveragepy/issues/1383
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index 2fbc53f5..ea6b1199 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -4,6 +4,7 @@
"""SQLite coverage data."""
import collections
+import datetime
import functools
import glob
import itertools
@@ -51,10 +52,11 @@ CREATE TABLE meta (
key text,
value text,
unique (key)
- -- Keys:
+ -- Possible keys:
-- 'has_arcs' boolean -- Is this data recording branches?
-- 'sys_argv' text -- The coverage command line that recorded the data.
-- 'version' text -- The version of coverage.py that made the file.
+ -- 'when' text -- Datetime when the file was created.
);
CREATE TABLE file (
@@ -298,13 +300,18 @@ class CoverageData(SimpleReprMixin):
self._debug.write(f"Initing data file {self._filename!r}")
db.executescript(SCHEMA)
db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,))
- db.executemany(
- "insert or ignore into meta (key, value) values (?, ?)",
- [
+
+ # When writing metadata, avoid information that will needlessly change
+ # the hash of the data file, unless we're debugging processes.
+ meta_data = [
+ ("version", __version__),
+ ]
+ if self._debug.should("process"):
+ meta_data.extend([
("sys_argv", str(getattr(sys, "argv", None))),
- ("version", __version__),
- ]
- )
+ ("when", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+ ])
+ db.executemany("insert or ignore into meta (key, value) values (?, ?)", meta_data)
def _connect(self):
"""Get the SqliteDb object to use."""
diff --git a/doc/cmd.rst b/doc/cmd.rst
index be323c0c..919b6d88 100644
--- a/doc/cmd.rst
+++ b/doc/cmd.rst
@@ -1025,7 +1025,8 @@ of operation to log:
* ``plugin``: print information about plugin operations.
* ``process``: show process creation information, and changes in the current
- directory.
+ directory. This also writes a timestamp and command arguments into the data
+ file.
* ``pybehave``: show the values of `internal flags <env.py_>`_ describing the
behavior of the current version of Python.
diff --git a/doc/dbschema.rst b/doc/dbschema.rst
index 42e616d9..b576acaa 100644
--- a/doc/dbschema.rst
+++ b/doc/dbschema.rst
@@ -66,10 +66,11 @@ This is the database schema:
key text,
value text,
unique (key)
- -- Keys:
+ -- Possible keys:
-- 'has_arcs' boolean -- Is this data recording branches?
-- 'sys_argv' text -- The coverage command line that recorded the data.
-- 'version' text -- The version of coverage.py that made the file.
+ -- 'when' text -- Datetime when the file was created.
);
CREATE TABLE file (
@@ -115,7 +116,7 @@ This is the database schema:
foreign key (file_id) references file (id)
);
-.. [[[end]]] (checksum: 9d87794485a9aa6d9064b735972a3447)
+.. [[[end]]] (checksum: 6a04d14b07f08f86cccf43056328dcb7)
.. _numbits:
diff --git a/tests/test_data.py b/tests/test_data.py
index b1a215e2..79c90420 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -904,6 +904,26 @@ class CoverageDataFilesTest(CoverageTest):
assert_measured_files(covdata3, MEASURED_FILES_1_2)
self.assert_file_count(glob.escape(basename) + ".*", 0)
+ def test_meta_data(self):
+ # The metadata written to the data file shouldn't interfere with
+ # hashing to remove duplicates, except for debug=process, which
+ # writes debugging info as metadata.
+ debug = DebugControlString(options=[])
+ covdata1 = CoverageData(basename="meta.1", debug=debug)
+ covdata1.add_lines(LINES_1)
+ covdata1.write()
+ with sqlite3.connect("meta.1") as con:
+ data = sorted(k for (k,) in con.execute("select key from meta"))
+ assert data == ["has_arcs", "version"]
+
+ debug = DebugControlString(options=["process"])
+ covdata2 = CoverageData(basename="meta.2", debug=debug)
+ covdata2.add_lines(LINES_1)
+ covdata2.write()
+ with sqlite3.connect("meta.2") as con:
+ data = sorted(k for (k,) in con.execute("select key from meta"))
+ assert data == ["has_arcs", "sys_argv", "version", "when"]
+
class DumpsLoadsTest(CoverageTest):
"""Tests of CoverageData.dumps and loads."""