refactor: use sets to collect data

Coverage.py predates sets as a built-in data structure, so the file data collection has long been dicts with None as the values. Sets are available to us now (since Python 2.4 in 2004, which coverage.py dropped support for in 2014!), we use sets.
author: Ned Batchelder <ned@nedbatchelder.com> 2021-08-15 08:26:36 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2021-08-15 08:26:36 -0400
commit: db235732dd9a0198f6e5d00b895baa516221fee2 (patch)
tree: 478b50037f9fdbef2fc570c3dbad899987518bfa
parent: f6d3e88ba5b2dab1720281885c99cdf3ce2844bc (diff)
download: python-coveragepy-git-db235732dd9a0198f6e5d00b895baa516221fee2.tar.gz
6 files changed, 60 insertions, 62 deletions
diff --git a/coverage/ctracer/datastack.h b/coverage/ctracer/datastack.h
index 3b3078ba..c383e1e1 100644
--- a/coverage/ctracer/datastack.h
+++ b/coverage/ctracer/datastack.h
@@ -12,7 +12,7 @@
  * possible.
  */
 typedef struct DataStackEntry {
-    /* The current file_data dictionary. Owned. */
+    /* The current file_data set. Owned. */
     PyObject * file_data;
 
     /* The disposition object for this frame. A borrowed instance of CFileDisposition. */
diff --git a/coverage/ctracer/tracer.c b/coverage/ctracer/tracer.c
index a3daacb6..00d9f106 100644
--- a/coverage/ctracer/tracer.c
+++ b/coverage/ctracer/tracer.c
@@ -182,7 +182,7 @@ CTracer_record_pair(CTracer *self, int l1, int l2)
         goto error;
     }
 
-    if (PyDict_SetItem(self->pcur_entry->file_data, t, Py_None) < 0) {
+    if (PySet_Add(self->pcur_entry->file_data, t) < 0) {
         goto error;
     }
 
@@ -504,7 +504,7 @@ CTracer_handle_call(CTracer *self, PyFrameObject *frame)
             if (PyErr_Occurred()) {
                 goto error;
             }
-            file_data = PyDict_New();
+            file_data = PySet_New(NULL);
             if (file_data == NULL) {
                 goto error;
             }
@@ -674,7 +674,7 @@ CTracer_handle_line(CTracer *self, PyFrameObject *frame)
                             goto error;
                         }
 
-                        ret2 = PyDict_SetItem(self->pcur_entry->file_data, this_line, Py_None);
+                        ret2 = PySet_Add(self->pcur_entry->file_data, this_line);
                         Py_DECREF(this_line);
                         if (ret2 < 0) {
                             goto error;
diff --git a/coverage/ctracer/tracer.h b/coverage/ctracer/tracer.h
index 8994a9e3..fbbfa202 100644
--- a/coverage/ctracer/tracer.h
+++ b/coverage/ctracer/tracer.h
@@ -39,15 +39,14 @@ typedef struct CTracer {
     PyObject * context;
 
     /*
-        The data stack is a stack of dictionaries.  Each dictionary collects
+        The data stack is a stack of sets.  Each set collects
         data for a single source file.  The data stack parallels the call stack:
         each call pushes the new frame's file data onto the data stack, and each
         return pops file data off.
 
-        The file data is a dictionary whose form depends on the tracing options.
-        If tracing arcs, the keys are line number pairs.  If not tracing arcs,
-        the keys are line numbers.  In both cases, the value is irrelevant
-        (None).
+        The file data is a set whose form depends on the tracing options.
+        If tracing arcs, the values are line number pairs.  If not tracing arcs,
+        the values are line numbers.
     */
 
     DataStack data_stack;           /* Used if we aren't doing concurrency. */
diff --git a/coverage/pytracer.py b/coverage/pytracer.py
index 540df68c..d4a0b748 100644
--- a/coverage/pytracer.py
+++ b/coverage/pytracer.py
@@ -48,7 +48,7 @@ class PyTracer:
         # The threading module to use, if any.
         self.threading = None
 
-        self.cur_file_dict = None
+        self.cur_file_data = None
         self.last_line = 0          # int, but uninitialized.
         self.cur_file_name = None
         self.context = None
@@ -113,7 +113,7 @@ class PyTracer:
                     self.log(">", f.f_code.co_filename, f.f_lineno, f.f_code.co_name, f.f_trace)
                     f = f.f_back
             sys.settrace(None)
-            self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
+            self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
                 self.data_stack.pop()
             )
             return None
@@ -121,10 +121,10 @@ class PyTracer:
         if self.last_exc_back:
             if frame == self.last_exc_back:
                 # Someone forgot a return event.
-                if self.trace_arcs and self.cur_file_dict:
+                if self.trace_arcs and self.cur_file_data:
                     pair = (self.last_line, -self.last_exc_firstlineno)
-                    self.cur_file_dict[pair] = None
-                self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
+                    self.cur_file_data.add(pair)
+                self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
                     self.data_stack.pop()
                 )
             self.last_exc_back = None
@@ -150,7 +150,7 @@ class PyTracer:
             self._activity = True
             self.data_stack.append(
                 (
-                    self.cur_file_dict,
+                    self.cur_file_data,
                     self.cur_file_name,
                     self.last_line,
                     self.started_context,
@@ -163,12 +163,12 @@ class PyTracer:
                 disp = self.should_trace(filename, frame)
                 self.should_trace_cache[filename] = disp
 
-            self.cur_file_dict = None
+            self.cur_file_data = None
             if disp.trace:
                 tracename = disp.source_filename
                 if tracename not in self.data:
-                    self.data[tracename] = {}
-                self.cur_file_dict = self.data[tracename]
+                    self.data[tracename] = set()
+                self.cur_file_data = self.data[tracename]
             # The call event is really a "start frame" event, and happens for
             # function calls and re-entering generators.  The f_lasti field is
             # -1 for calls, and a real offset for generators.  Use <0 as the
@@ -179,25 +179,25 @@ class PyTracer:
                 self.last_line = frame.f_lineno
         elif event == 'line':
             # Record an executed line.
-            if self.cur_file_dict is not None:
+            if self.cur_file_data is not None:
                 lineno = frame.f_lineno
 
                 if self.trace_arcs:
-                    self.cur_file_dict[(self.last_line, lineno)] = None
+                    self.cur_file_data.add((self.last_line, lineno))
                 else:
-                    self.cur_file_dict[lineno] = None
+                    self.cur_file_data.add(lineno)
                 self.last_line = lineno
         elif event == 'return':
-            if self.trace_arcs and self.cur_file_dict:
+            if self.trace_arcs and self.cur_file_data:
                 # Record an arc leaving the function, but beware that a
                 # "return" event might just mean yielding from a generator.
                 # Jython seems to have an empty co_code, so just assume return.
                 code = frame.f_code.co_code
                 if (not code) or code[frame.f_lasti] != YIELD_VALUE:
                     first = frame.f_code.co_firstlineno
-                    self.cur_file_dict[(self.last_line, -first)] = None
+                    self.cur_file_data.add((self.last_line, -first))
             # Leaving this function, pop the filename stack.
-            self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
+            self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
                 self.data_stack.pop()
             )
             # Leaving a context?
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index db3ab73a..3fe5317e 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -450,9 +450,9 @@ class CoverageData(SimpleReprMixin):
     def add_lines(self, line_data):
         """Add measured line data.
 
-        `line_data` is a dictionary mapping file names to dictionaries::
+        `line_data` is a dictionary mapping file names to iterables of ints::
 
-            { filename: { lineno: None, ... }, ...}
+            { filename: { line1, line2, ... }, ...}
 
         """
         if self._debug.should('dataop'):
@@ -483,9 +483,10 @@ class CoverageData(SimpleReprMixin):
     def add_arcs(self, arc_data):
         """Add measured arc data.
 
-        `arc_data` is a dictionary mapping file names to dictionaries::
+        `arc_data` is a dictionary mapping file names to iterables of pairs of
+        ints::
 
-            { filename: { (l1,l2): None, ... }, ...}
+            { filename: { (l1,l2), (l1,l2), ... }, ...}
 
         """
         if self._debug.should('dataop'):
diff --git a/tests/test_data.py b/tests/test_data.py
index 15b7b418..9b5d3d05 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -24,8 +24,8 @@ from tests.helpers import assert_count_equal
 
 
 LINES_1 = {
-    'a.py': {1: None, 2: None},
-    'b.py': {3: None},
+    'a.py': {1, 2},
+    'b.py': {3},
 }
 SUMMARY_1 = {'a.py': 2, 'b.py': 1}
 MEASURED_FILES_1 = ['a.py', 'b.py']
@@ -33,24 +33,15 @@ A_PY_LINES_1 = [1, 2]
 B_PY_LINES_1 = [3]
 
 LINES_2 = {
-    'a.py': {1: None, 5: None},
-    'c.py': {17: None},
+    'a.py': {1, 5},
+    'c.py': {17},
 }
 SUMMARY_1_2 = {'a.py': 3, 'b.py': 1, 'c.py': 1}
 MEASURED_FILES_1_2 = ['a.py', 'b.py', 'c.py']
 
 ARCS_3 = {
-    'x.py': {
-        (-1, 1): None,
-        (1, 2): None,
-        (2, 3): None,
-        (3, -1): None,
-    },
-    'y.py': {
-        (-1, 17): None,
-        (17, 23): None,
-        (23, -1): None,
-    },
+    'x.py': {(-1, 1), (1, 2), (2, 3), (3, -1)},
+    'y.py': {(-1, 17), (17, 23), (23, -1)},
 }
 X_PY_ARCS_3 = [(-1, 1), (1, 2), (2, 3), (3, -1)]
 Y_PY_ARCS_3 = [(-1, 17), (17, 23), (23, -1)]
@@ -60,15 +51,8 @@ X_PY_LINES_3 = [1, 2, 3]
 Y_PY_LINES_3 = [17, 23]
 
 ARCS_4 = {
-    'x.py': {
-        (-1, 2): None,
-        (2, 5): None,
-        (5, -1): None,
-    },
-    'z.py': {
-        (-1, 1000): None,
-        (1000, -1): None,
-    },
+    'x.py': {(-1, 2), (2, 5), (5, -1)},
+    'z.py': {(-1, 1000), (1000, -1)},
 }
 SUMMARY_3_4 = {'x.py': 4, 'y.py': 2, 'z.py': 1}
 MEASURED_FILES_3_4 = ['x.py', 'y.py', 'z.py']
@@ -103,6 +87,16 @@ class DataTestHelpers(CoverageTest):
         assert covdata.has_arcs()
 
 
+def dicts_from_sets(file_data):
+    """Convert a dict of sets into a dict of dicts.
+
+    Before 6.0, file data was a dict with None as the values.  In 6.0, file
+    data is a set.  SqlData all along only cared that it was an iterable.
+    This function helps us test that the old dict format still works.
+    """
+    return {k: dict.fromkeys(v) for k, v in file_data.items()}
+
+
 class CoverageDataTest(DataTestHelpers, CoverageTest):
     """Test cases for CoverageData."""
 
@@ -130,14 +124,16 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         covdata.add_arcs({})
         assert not covdata
 
-    def test_adding_lines(self):
+    @pytest.mark.parametrize("lines", [LINES_1, dicts_from_sets(LINES_1)])
+    def test_adding_lines(self, lines):
         covdata = CoverageData()
-        covdata.add_lines(LINES_1)
+        covdata.add_lines(lines)
         self.assert_lines1_data(covdata)
 
-    def test_adding_arcs(self):
+    @pytest.mark.parametrize("arcs", [ARCS_3, dicts_from_sets(ARCS_3)])
+    def test_adding_arcs(self, arcs):
         covdata = CoverageData()
-        covdata.add_arcs(ARCS_3)
+        covdata.add_arcs(arcs)
         self.assert_arcs3_data(covdata)
 
     def test_ok_to_add_lines_twice(self):
@@ -212,20 +208,22 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         covdata.add_lines(LINES_1)
         assert covdata.contexts_by_lineno('a.py') == {1: ['test_a'], 2: ['test_a']}
 
-    def test_no_duplicate_lines(self):
+    @pytest.mark.parametrize("lines", [LINES_1, dicts_from_sets(LINES_1)])
+    def test_no_duplicate_lines(self, lines):
         covdata = CoverageData()
         covdata.set_context("context1")
-        covdata.add_lines(LINES_1)
+        covdata.add_lines(lines)
         covdata.set_context("context2")
-        covdata.add_lines(LINES_1)
+        covdata.add_lines(lines)
         assert covdata.lines('a.py') == A_PY_LINES_1
 
-    def test_no_duplicate_arcs(self):
+    @pytest.mark.parametrize("arcs", [ARCS_3, dicts_from_sets(ARCS_3)])
+    def test_no_duplicate_arcs(self, arcs):
         covdata = CoverageData()
         covdata.set_context("context1")
-        covdata.add_arcs(ARCS_3)
+        covdata.add_arcs(arcs)
         covdata.set_context("context2")
-        covdata.add_arcs(ARCS_3)
+        covdata.add_arcs(arcs)
         assert covdata.arcs('x.py') == X_PY_ARCS_3
 
     def test_no_arcs_vs_unmeasured_file(self):
author	Ned Batchelder <ned@nedbatchelder.com>	2021-08-15 08:26:36 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2021-08-15 08:26:36 -0400
commit	db235732dd9a0198f6e5d00b895baa516221fee2 (patch)
tree	478b50037f9fdbef2fc570c3dbad899987518bfa
parent	f6d3e88ba5b2dab1720281885c99cdf3ce2844bc (diff)
download	python-coveragepy-git-db235732dd9a0198f6e5d00b895baa516221fee2.tar.gz