summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2021-08-15 08:26:36 -0400
committerNed Batchelder <ned@nedbatchelder.com>2021-08-15 08:26:36 -0400
commitdb235732dd9a0198f6e5d00b895baa516221fee2 (patch)
tree478b50037f9fdbef2fc570c3dbad899987518bfa
parentf6d3e88ba5b2dab1720281885c99cdf3ce2844bc (diff)
downloadpython-coveragepy-git-db235732dd9a0198f6e5d00b895baa516221fee2.tar.gz
refactor: use sets to collect data
Coverage.py predates sets as a built-in data structure, so the file data collection has long been dicts with None as the values. Sets are available to us now (since Python 2.4 in 2004, which coverage.py dropped support for in 2014!), we use sets.
-rw-r--r--coverage/ctracer/datastack.h2
-rw-r--r--coverage/ctracer/tracer.c6
-rw-r--r--coverage/ctracer/tracer.h9
-rw-r--r--coverage/pytracer.py30
-rw-r--r--coverage/sqldata.py9
-rw-r--r--tests/test_data.py66
6 files changed, 60 insertions, 62 deletions
diff --git a/coverage/ctracer/datastack.h b/coverage/ctracer/datastack.h
index 3b3078ba..c383e1e1 100644
--- a/coverage/ctracer/datastack.h
+++ b/coverage/ctracer/datastack.h
@@ -12,7 +12,7 @@
* possible.
*/
typedef struct DataStackEntry {
- /* The current file_data dictionary. Owned. */
+ /* The current file_data set. Owned. */
PyObject * file_data;
/* The disposition object for this frame. A borrowed instance of CFileDisposition. */
diff --git a/coverage/ctracer/tracer.c b/coverage/ctracer/tracer.c
index a3daacb6..00d9f106 100644
--- a/coverage/ctracer/tracer.c
+++ b/coverage/ctracer/tracer.c
@@ -182,7 +182,7 @@ CTracer_record_pair(CTracer *self, int l1, int l2)
goto error;
}
- if (PyDict_SetItem(self->pcur_entry->file_data, t, Py_None) < 0) {
+ if (PySet_Add(self->pcur_entry->file_data, t) < 0) {
goto error;
}
@@ -504,7 +504,7 @@ CTracer_handle_call(CTracer *self, PyFrameObject *frame)
if (PyErr_Occurred()) {
goto error;
}
- file_data = PyDict_New();
+ file_data = PySet_New(NULL);
if (file_data == NULL) {
goto error;
}
@@ -674,7 +674,7 @@ CTracer_handle_line(CTracer *self, PyFrameObject *frame)
goto error;
}
- ret2 = PyDict_SetItem(self->pcur_entry->file_data, this_line, Py_None);
+ ret2 = PySet_Add(self->pcur_entry->file_data, this_line);
Py_DECREF(this_line);
if (ret2 < 0) {
goto error;
diff --git a/coverage/ctracer/tracer.h b/coverage/ctracer/tracer.h
index 8994a9e3..fbbfa202 100644
--- a/coverage/ctracer/tracer.h
+++ b/coverage/ctracer/tracer.h
@@ -39,15 +39,14 @@ typedef struct CTracer {
PyObject * context;
/*
- The data stack is a stack of dictionaries. Each dictionary collects
+ The data stack is a stack of sets. Each set collects
data for a single source file. The data stack parallels the call stack:
each call pushes the new frame's file data onto the data stack, and each
return pops file data off.
- The file data is a dictionary whose form depends on the tracing options.
- If tracing arcs, the keys are line number pairs. If not tracing arcs,
- the keys are line numbers. In both cases, the value is irrelevant
- (None).
+ The file data is a set whose form depends on the tracing options.
+ If tracing arcs, the values are line number pairs. If not tracing arcs,
+ the values are line numbers.
*/
DataStack data_stack; /* Used if we aren't doing concurrency. */
diff --git a/coverage/pytracer.py b/coverage/pytracer.py
index 540df68c..d4a0b748 100644
--- a/coverage/pytracer.py
+++ b/coverage/pytracer.py
@@ -48,7 +48,7 @@ class PyTracer:
# The threading module to use, if any.
self.threading = None
- self.cur_file_dict = None
+ self.cur_file_data = None
self.last_line = 0 # int, but uninitialized.
self.cur_file_name = None
self.context = None
@@ -113,7 +113,7 @@ class PyTracer:
self.log(">", f.f_code.co_filename, f.f_lineno, f.f_code.co_name, f.f_trace)
f = f.f_back
sys.settrace(None)
- self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
+ self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
self.data_stack.pop()
)
return None
@@ -121,10 +121,10 @@ class PyTracer:
if self.last_exc_back:
if frame == self.last_exc_back:
# Someone forgot a return event.
- if self.trace_arcs and self.cur_file_dict:
+ if self.trace_arcs and self.cur_file_data:
pair = (self.last_line, -self.last_exc_firstlineno)
- self.cur_file_dict[pair] = None
- self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
+ self.cur_file_data.add(pair)
+ self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
self.data_stack.pop()
)
self.last_exc_back = None
@@ -150,7 +150,7 @@ class PyTracer:
self._activity = True
self.data_stack.append(
(
- self.cur_file_dict,
+ self.cur_file_data,
self.cur_file_name,
self.last_line,
self.started_context,
@@ -163,12 +163,12 @@ class PyTracer:
disp = self.should_trace(filename, frame)
self.should_trace_cache[filename] = disp
- self.cur_file_dict = None
+ self.cur_file_data = None
if disp.trace:
tracename = disp.source_filename
if tracename not in self.data:
- self.data[tracename] = {}
- self.cur_file_dict = self.data[tracename]
+ self.data[tracename] = set()
+ self.cur_file_data = self.data[tracename]
# The call event is really a "start frame" event, and happens for
# function calls and re-entering generators. The f_lasti field is
# -1 for calls, and a real offset for generators. Use <0 as the
@@ -179,25 +179,25 @@ class PyTracer:
self.last_line = frame.f_lineno
elif event == 'line':
# Record an executed line.
- if self.cur_file_dict is not None:
+ if self.cur_file_data is not None:
lineno = frame.f_lineno
if self.trace_arcs:
- self.cur_file_dict[(self.last_line, lineno)] = None
+ self.cur_file_data.add((self.last_line, lineno))
else:
- self.cur_file_dict[lineno] = None
+ self.cur_file_data.add(lineno)
self.last_line = lineno
elif event == 'return':
- if self.trace_arcs and self.cur_file_dict:
+ if self.trace_arcs and self.cur_file_data:
# Record an arc leaving the function, but beware that a
# "return" event might just mean yielding from a generator.
# Jython seems to have an empty co_code, so just assume return.
code = frame.f_code.co_code
if (not code) or code[frame.f_lasti] != YIELD_VALUE:
first = frame.f_code.co_firstlineno
- self.cur_file_dict[(self.last_line, -first)] = None
+ self.cur_file_data.add((self.last_line, -first))
# Leaving this function, pop the filename stack.
- self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
+ self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
self.data_stack.pop()
)
# Leaving a context?
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index db3ab73a..3fe5317e 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -450,9 +450,9 @@ class CoverageData(SimpleReprMixin):
def add_lines(self, line_data):
"""Add measured line data.
- `line_data` is a dictionary mapping file names to dictionaries::
+ `line_data` is a dictionary mapping file names to iterables of ints::
- { filename: { lineno: None, ... }, ...}
+ { filename: { line1, line2, ... }, ...}
"""
if self._debug.should('dataop'):
@@ -483,9 +483,10 @@ class CoverageData(SimpleReprMixin):
def add_arcs(self, arc_data):
"""Add measured arc data.
- `arc_data` is a dictionary mapping file names to dictionaries::
+ `arc_data` is a dictionary mapping file names to iterables of pairs of
+ ints::
- { filename: { (l1,l2): None, ... }, ...}
+ { filename: { (l1,l2), (l1,l2), ... }, ...}
"""
if self._debug.should('dataop'):
diff --git a/tests/test_data.py b/tests/test_data.py
index 15b7b418..9b5d3d05 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -24,8 +24,8 @@ from tests.helpers import assert_count_equal
LINES_1 = {
- 'a.py': {1: None, 2: None},
- 'b.py': {3: None},
+ 'a.py': {1, 2},
+ 'b.py': {3},
}
SUMMARY_1 = {'a.py': 2, 'b.py': 1}
MEASURED_FILES_1 = ['a.py', 'b.py']
@@ -33,24 +33,15 @@ A_PY_LINES_1 = [1, 2]
B_PY_LINES_1 = [3]
LINES_2 = {
- 'a.py': {1: None, 5: None},
- 'c.py': {17: None},
+ 'a.py': {1, 5},
+ 'c.py': {17},
}
SUMMARY_1_2 = {'a.py': 3, 'b.py': 1, 'c.py': 1}
MEASURED_FILES_1_2 = ['a.py', 'b.py', 'c.py']
ARCS_3 = {
- 'x.py': {
- (-1, 1): None,
- (1, 2): None,
- (2, 3): None,
- (3, -1): None,
- },
- 'y.py': {
- (-1, 17): None,
- (17, 23): None,
- (23, -1): None,
- },
+ 'x.py': {(-1, 1), (1, 2), (2, 3), (3, -1)},
+ 'y.py': {(-1, 17), (17, 23), (23, -1)},
}
X_PY_ARCS_3 = [(-1, 1), (1, 2), (2, 3), (3, -1)]
Y_PY_ARCS_3 = [(-1, 17), (17, 23), (23, -1)]
@@ -60,15 +51,8 @@ X_PY_LINES_3 = [1, 2, 3]
Y_PY_LINES_3 = [17, 23]
ARCS_4 = {
- 'x.py': {
- (-1, 2): None,
- (2, 5): None,
- (5, -1): None,
- },
- 'z.py': {
- (-1, 1000): None,
- (1000, -1): None,
- },
+ 'x.py': {(-1, 2), (2, 5), (5, -1)},
+ 'z.py': {(-1, 1000), (1000, -1)},
}
SUMMARY_3_4 = {'x.py': 4, 'y.py': 2, 'z.py': 1}
MEASURED_FILES_3_4 = ['x.py', 'y.py', 'z.py']
@@ -103,6 +87,16 @@ class DataTestHelpers(CoverageTest):
assert covdata.has_arcs()
+def dicts_from_sets(file_data):
+ """Convert a dict of sets into a dict of dicts.
+
+ Before 6.0, file data was a dict with None as the values. In 6.0, file
+ data is a set. SqlData all along only cared that it was an iterable.
+ This function helps us test that the old dict format still works.
+ """
+ return {k: dict.fromkeys(v) for k, v in file_data.items()}
+
+
class CoverageDataTest(DataTestHelpers, CoverageTest):
"""Test cases for CoverageData."""
@@ -130,14 +124,16 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
covdata.add_arcs({})
assert not covdata
- def test_adding_lines(self):
+ @pytest.mark.parametrize("lines", [LINES_1, dicts_from_sets(LINES_1)])
+ def test_adding_lines(self, lines):
covdata = CoverageData()
- covdata.add_lines(LINES_1)
+ covdata.add_lines(lines)
self.assert_lines1_data(covdata)
- def test_adding_arcs(self):
+ @pytest.mark.parametrize("arcs", [ARCS_3, dicts_from_sets(ARCS_3)])
+ def test_adding_arcs(self, arcs):
covdata = CoverageData()
- covdata.add_arcs(ARCS_3)
+ covdata.add_arcs(arcs)
self.assert_arcs3_data(covdata)
def test_ok_to_add_lines_twice(self):
@@ -212,20 +208,22 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
covdata.add_lines(LINES_1)
assert covdata.contexts_by_lineno('a.py') == {1: ['test_a'], 2: ['test_a']}
- def test_no_duplicate_lines(self):
+ @pytest.mark.parametrize("lines", [LINES_1, dicts_from_sets(LINES_1)])
+ def test_no_duplicate_lines(self, lines):
covdata = CoverageData()
covdata.set_context("context1")
- covdata.add_lines(LINES_1)
+ covdata.add_lines(lines)
covdata.set_context("context2")
- covdata.add_lines(LINES_1)
+ covdata.add_lines(lines)
assert covdata.lines('a.py') == A_PY_LINES_1
- def test_no_duplicate_arcs(self):
+ @pytest.mark.parametrize("arcs", [ARCS_3, dicts_from_sets(ARCS_3)])
+ def test_no_duplicate_arcs(self, arcs):
covdata = CoverageData()
covdata.set_context("context1")
- covdata.add_arcs(ARCS_3)
+ covdata.add_arcs(arcs)
covdata.set_context("context2")
- covdata.add_arcs(ARCS_3)
+ covdata.add_arcs(arcs)
assert covdata.arcs('x.py') == X_PY_ARCS_3
def test_no_arcs_vs_unmeasured_file(self):