Remove the JSON data code

author: Ned Batchelder <ned@nedbatchelder.com> 2019-07-09 16:22:51 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2019-07-10 06:57:24 -0400
commit: 6b226d85f5191cd27b20ad27caded8b407772a02 (patch)
tree: a7d2e58246fb5ef9b8128ff342e97968c4ace853
parent: 9bc6b93805a5f20a87211a315d00503eddab66dc (diff)
download: python-coveragepy-git-6b226d85f5191cd27b20ad27caded8b407772a02.tar.gz
10 files changed, 102 insertions, 898 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index ec5fea2d..7438771d 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -34,6 +34,9 @@ Unreleased
 
 - Dynamic contexts with no data are no longer written to the database.
 
+- SQLite data storage is now faster.  There's no longer a reason to keep the
+  JSON data file code, so it has been removed.
+
 - Added the classmethod :meth:`Coverage.current` to get the latest started
   `Coverage` instance.
 
diff --git a/coverage/data.py b/coverage/data.py
index bcb418b8..82bf1d41 100644
--- a/coverage/data.py
+++ b/coverage/data.py
@@ -1,667 +1,20 @@
 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
 
-"""Coverage data for coverage.py."""
+"""Coverage data for coverage.py.
 
-import collections
-import glob
-import itertools
-import json
-import optparse
-import os
-import os.path
-import random
-import re
-import socket
-
-from coverage import env
-from coverage.backward import iitems, string_class
-from coverage.debug import NoDebugging
-from coverage.files import PathAliases
-from coverage.misc import CoverageException, file_be_gone, isolate_module
-
-os = isolate_module(os)
-
-
-def filename_suffix(suffix):
-    if suffix is True:
-        # If data_suffix was a simple true value, then make a suffix with
-        # plenty of distinguishing information.  We do this here in
-        # `save()` at the last minute so that the pid will be correct even
-        # if the process forks.
-        dice = random.Random(os.urandom(8)).randint(0, 999999)
-        suffix = "%s.%s.%06d" % (socket.gethostname(), os.getpid(), dice)
-    return suffix
-
-
-class CoverageJsonData(object):
-    """Manages collected coverage data, including file storage.
-
-    This class is the public supported API to the data coverage.py collects
-    during program execution.  It includes information about what code was
-    executed. It does not include information from the analysis phase, to
-    determine what lines could have been executed, or what lines were not
-    executed.
-
-    .. note::
-
-        The file format is not documented or guaranteed.  It will change in
-        the future, in possibly complicated ways.  Do not read coverage.py
-        data files directly.  Use this API to avoid disruption.
-
-    There are a number of kinds of data that can be collected:
-
-    * **lines**: the line numbers of source lines that were executed.
-      These are always available.
-
-    * **arcs**: pairs of source and destination line numbers for transitions
-      between source lines.  These are only available if branch coverage was
-      used.
-
-    * **file tracer names**: the module names of the file tracer plugins that
-      handled each file in the data.
-
-    * **run information**: information about the program execution.  This is
-      written during "coverage run", and then accumulated during "coverage
-      combine".
-
-    Lines, arcs, and file tracer names are stored for each source file. File
-    names in this API are case-sensitive, even on platforms with
-    case-insensitive file systems.
-
-    A data file is associated with the data when the :class:`CoverageData`
-    is created.
-
-    To read a coverage.py data file, use :meth:`read`.  You can then
-    access the line, arc, or file tracer data with :meth:`lines`, :meth:`arcs`,
-    or :meth:`file_tracer`.  Run information is available with
-    :meth:`run_infos`.
-
-    The :meth:`has_arcs` method indicates whether arc data is available.  You
-    can get a list of the files in the data with :meth:`measured_files`.
-    A summary of the line data is available from :meth:`line_counts`.  As with
-    most Python containers, you can determine if there is any data at all by
-    using this object as a boolean value.
-
-    Most data files will be created by coverage.py itself, but you can use
-    methods here to create data files if you like.  The :meth:`add_lines`,
-    :meth:`add_arcs`, and :meth:`add_file_tracers` methods add data, in ways
-    that are convenient for coverage.py.  The :meth:`add_run_info` method adds
-    key-value pairs to the run information.
-
-    To add a source file without any measured data, use :meth:`touch_file`.
-
-    Write the data to its file with :meth:`write`.
-
-    You can clear the data in memory with :meth:`erase`.  Two data collections
-    can be combined by using :meth:`update` on one :class:`CoverageData`,
-    passing it the other.
-
-    """
-
-    # The data file format is JSON, with these keys:
-    #
-    #     * lines: a dict mapping file names to lists of line numbers
-    #       executed::
-    #
-    #         { "file1": [17,23,45], "file2": [1,2,3], ... }
-    #
-    #     * arcs: a dict mapping file names to lists of line number pairs::
-    #
-    #         { "file1": [[17,23], [17,25], [25,26]], ... }
-    #
-    #     * file_tracers: a dict mapping file names to plugin names::
-    #
-    #         { "file1": "django.coverage", ... }
-    #
-    #     * runs: a list of dicts of information about the coverage.py runs
-    #       contributing to the data::
-    #
-    #         [ { "brief_sys": "CPython 2.7.10 Darwin" }, ... ]
-    #
-    # Only one of `lines` or `arcs` will be present: with branch coverage, data
-    # is stored as arcs. Without branch coverage, it is stored as lines.  The
-    # line data is easily recovered from the arcs: it is all the first elements
-    # of the pairs that are greater than zero.
-
-    def __init__(self, basename=None, suffix=None, warn=None, debug=None):
-        """Create a CoverageData.
-
-        `warn` is the warning function to use.
-
-        `basename` is the name of the file to use for storing data.
-
-        `debug` is a `DebugControl` object for writing debug messages.
-
-        """
-        self._warn = warn
-        self._debug = debug or NoDebugging()
-        self.filename = os.path.abspath(basename or ".coverage")
-        self.suffix = suffix
-
-        # A map from canonical Python source file name to a dictionary in
-        # which there's an entry for each line number that has been
-        # executed:
-        #
-        #   { 'filename1.py': [12, 47, 1001], ... }
-        #
-        self._lines = None
-
-        # A map from canonical Python source file name to a dictionary with an
-        # entry for each pair of line numbers forming an arc:
-        #
-        #   { 'filename1.py': [(12,14), (47,48), ... ], ... }
-        #
-        self._arcs = None
-
-        # A map from canonical source file name to a plugin module name:
-        #
-        #   { 'filename1.py': 'django.coverage', ... }
-        #
-        self._file_tracers = {}
-
-        # A list of dicts of information about the coverage.py runs.
-        self._runs = []
-
-    def __repr__(self):
-        return "<{klass} lines={lines} arcs={arcs} tracers={tracers} runs={runs}>".format(
-            klass=self.__class__.__name__,
-            lines="None" if self._lines is None else "{{{0}}}".format(len(self._lines)),
-            arcs="None" if self._arcs is None else "{{{0}}}".format(len(self._arcs)),
-            tracers="{{{0}}}".format(len(self._file_tracers)),
-            runs="[{0}]".format(len(self._runs)),
-        )
-
-    ##
-    ## Reading data
-    ##
-
-    def set_query_contexts(self, contexts=None):
-        """Set the query contexts.
-
-        No-op, since contexts are not supported for this data format.
-        """
-        pass
-
-    def has_arcs(self):
-        """Does this data have arcs?
-
-        Arc data is only available if branch coverage was used during
-        collection.
-
-        Returns a boolean.
-
-        """
-        return self._has_arcs()
-
-    def lines(self, filename, contexts=None):
-        """Get the list of lines executed for a file.
-
-        If the file was not measured, returns None.  A file might be measured,
-        and have no lines executed, in which case an empty list is returned.
-
-        If the file was executed, returns a list of integers, the line numbers
-        executed in the file. The list is in no particular order.
-
-        `contexts` is ignored, since contexts are not supported for this data
-        format.
-        """
-        if self._arcs is not None:
-            arcs = self._arcs.get(filename)
-            if arcs is not None:
-                all_lines = itertools.chain.from_iterable(arcs)
-                return list(set(l for l in all_lines if l > 0))
-        elif self._lines is not None:
-            return self._lines.get(filename)
-        return None
-
-    def arcs(self, filename, contexts=None):
-        """Get the list of arcs executed for a file.
-
-        If the file was not measured, returns None.  A file might be measured,
-        and have no arcs executed, in which case an empty list is returned.
-
-        If the file was executed, returns a list of 2-tuples of integers. Each
-        pair is a starting line number and an ending line number for a
-        transition from one line to another. The list is in no particular
-        order.
-
-        Negative numbers have special meaning.  If the starting line number is
-        -N, it represents an entry to the code object that starts at line N.
-        If the ending ling number is -N, it's an exit from the code object that
-        starts at line N.
-
-        `contexts` is ignored, since contexts are not supported for this data
-        format.
-        """
-        if self._arcs is not None:
-            if filename in self._arcs:
-                return self._arcs[filename]
-        return None
-
-    def file_tracer(self, filename):
-        """Get the plugin name of the file tracer for a file.
-
-        Returns the name of the plugin that handles this file.  If the file was
-        measured, but didn't use a plugin, then "" is returned.  If the file
-        was not measured, then None is returned.
-
-        """
-        # Because the vast majority of files involve no plugin, we don't store
-        # them explicitly in self._file_tracers.  Check the measured data
-        # instead to see if it was a known file with no plugin.
-        if filename in (self._arcs or self._lines or {}):
-            return self._file_tracers.get(filename, "")
-        return None
-
-    def contexts_by_lineno(self, filename):
-        return collections.defaultdict(list)
-
-    def run_infos(self):
-        """Return the list of dicts of run information.
-
-        For data collected during a single run, this will be a one-element
-        list.  If data has been combined, there will be one element for each
-        original data file.
-
-        """
-        return self._runs
-
-    def measured_files(self):
-        """A set of all files that had been measured."""
-        return set(self._arcs or self._lines or {})
-
-    def __nonzero__(self):
-        return bool(self._lines or self._arcs)
-
-    __bool__ = __nonzero__
-
-    def read(self):
-        """Read the coverage data.
-
-        It is fine for the file to not exist, in which case no data is read.
-
-        """
-        if os.path.exists(self.filename):
-            self._read_file(self.filename)
-
-    def _read_fileobj(self, file_obj):
-        """Read the coverage data from the given file object.
-
-        Should only be used on an empty CoverageData object.
-
-        """
-        data = self._read_raw_data(file_obj)
-
-        self._lines = self._arcs = None
-
-        if 'lines' in data:
-            self._lines = data['lines']
-        if 'arcs' in data:
-            self._arcs = dict(
-                (fname, [tuple(pair) for pair in arcs])
-                for fname, arcs in iitems(data['arcs'])
-            )
-        self._file_tracers = data.get('file_tracers', {})
-        self._runs = data.get('runs', [])
-
-        self._validate()
-
-    def _read_file(self, filename):
-        """Read the coverage data from `filename` into this object."""
-        if self._debug.should('dataio'):
-            self._debug.write("Reading data from %r" % (filename,))
-        try:
-            with self._open_for_reading(filename) as f:
-                self._read_fileobj(f)
-        except Exception as exc:
-            raise CoverageException(
-                "Couldn't read data from '%s': %s: %s" % (
-                    filename, exc.__class__.__name__, exc,
-                )
-            )
-
-    _GO_AWAY = "!coverage.py: This is a private format, don't read it directly!"
-
-    @classmethod
-    def _open_for_reading(cls, filename):
-        """Open a file appropriately for reading data."""
-        return open(filename, "r")
-
-    @classmethod
-    def _read_raw_data(cls, file_obj):
-        """Read the raw data from a file object."""
-        go_away = file_obj.read(len(cls._GO_AWAY))
-        if go_away != cls._GO_AWAY:
-            raise CoverageException("Doesn't seem to be a coverage.py data file")
-        return json.load(file_obj)
-
-    @classmethod
-    def _read_raw_data_file(cls, filename):
-        """Read the raw data from a file, for debugging."""
-        with cls._open_for_reading(filename) as f:
-            return cls._read_raw_data(f)
+This file had the 4.x JSON data support, which is now gone.  This file still
+has storage-agnostic helpers, and is kept to avoid changing too many imports.
+CoverageData is now defined in sqldata.py, and imported here to keep the
+imports working.
 
-    ##
-    ## Writing data
-    ##
-
-    def add_lines(self, line_data):
-        """Add measured line data.
-
-        `line_data` is a dictionary mapping file names to dictionaries::
-
-            { filename: { lineno: None, ... }, ...}
-
-        """
-        if self._debug.should('dataop'):
-            self._debug.write("Adding lines: %d files, %d lines total" % (
-                len(line_data), sum(len(lines) for lines in line_data.values())
-            ))
-        if self._has_arcs():
-            raise CoverageException("Can't add lines to existing arc data")
-
-        if self._lines is None:
-            self._lines = {}
-        for filename, linenos in iitems(line_data):
-            if filename in self._lines:
-                new_linenos = set(self._lines[filename])
-                new_linenos.update(linenos)
-                linenos = new_linenos
-            self._lines[filename] = list(linenos)
-
-        self._validate()
-
-    def add_arcs(self, arc_data):
-        """Add measured arc data.
-
-        `arc_data` is a dictionary mapping file names to dictionaries::
-
-            { filename: { (l1,l2): None, ... }, ...}
-
-        """
-        if self._debug.should('dataop'):
-            self._debug.write("Adding arcs: %d files, %d arcs total" % (
-                len(arc_data), sum(len(arcs) for arcs in arc_data.values())
-            ))
-        if self._has_lines():
-            raise CoverageException("Can't add arcs to existing line data")
-
-        if self._arcs is None:
-            self._arcs = {}
-        for filename, arcs in iitems(arc_data):
-            if filename in self._arcs:
-                new_arcs = set(self._arcs[filename])
-                new_arcs.update(arcs)
-                arcs = new_arcs
-            self._arcs[filename] = list(arcs)
-
-        self._validate()
-
-    def add_file_tracers(self, file_tracers):
-        """Add per-file plugin information.
-
-        `file_tracers` is { filename: plugin_name, ... }
-
-        """
-        if self._debug.should('dataop'):
-            self._debug.write("Adding file tracers: %d files" % (len(file_tracers),))
-
-        existing_files = self._arcs or self._lines or {}
-        for filename, plugin_name in iitems(file_tracers):
-            if filename not in existing_files:
-                raise CoverageException(
-                    "Can't add file tracer data for unmeasured file '%s'" % (filename,)
-                )
-            existing_plugin = self._file_tracers.get(filename)
-            if existing_plugin is not None and plugin_name != existing_plugin:
-                raise CoverageException(
-                    "Conflicting file tracer name for '%s': %r vs %r" % (
-                        filename, existing_plugin, plugin_name,
-                    )
-                )
-            self._file_tracers[filename] = plugin_name
-
-        self._validate()
-
-    def add_run_info(self, **kwargs):
-        """Add information about the run.
-
-        Keywords are arbitrary, and are stored in the run dictionary. Values
-        must be JSON serializable.  You may use this function more than once,
-        but repeated keywords overwrite each other.
-
-        """
-        if self._debug.should('dataop'):
-            self._debug.write("Adding run info: %r" % (kwargs,))
-        if not self._runs:
-            self._runs = [{}]
-        self._runs[0].update(kwargs)
-        self._validate()
-
-    def touch_file(self, filename, plugin_name=""):
-        """Ensure that `filename` appears in the data, empty if needed.
-
-        `plugin_name` is the name of the plugin responsible for this file. It is used
-        to associate the right filereporter, etc.
-        """
-        if self._debug.should('dataop'):
-            self._debug.write("Touching %r" % (filename,))
-        if not self._has_arcs() and not self._has_lines():
-            raise CoverageException("Can't touch files in an empty CoverageData")
-
-        if self._has_arcs():
-            where = self._arcs
-        else:
-            where = self._lines
-        where.setdefault(filename, [])
-        if plugin_name:
-            # Set the tracer for this file
-            self._file_tracers[filename] = plugin_name
-
-        self._validate()
-
-    def set_context(self, context):
-        """Set the context. Not implemented for JSON storage."""
-        if context:
-            raise CoverageException("JSON storage doesn't support contexts")
-
-    def write(self):
-        """Write the collected coverage data to a file.
-
-        `suffix` is a suffix to append to the base file name. This can be used
-        for multiple or parallel execution, so that many coverage data files
-        can exist simultaneously.  A dot will be used to join the base name and
-        the suffix.
-
-        """
-        filename = self.filename
-        suffix = filename_suffix(self.suffix)
-        if suffix:
-            filename += "." + suffix
-        self._write_file(filename)
-
-    def _write_fileobj(self, file_obj):
-        """Write the coverage data to `file_obj`."""
-
-        # Create the file data.
-        file_data = {}
-
-        if self._has_arcs():
-            file_data['arcs'] = self._arcs
-
-        if self._has_lines():
-            file_data['lines'] = self._lines
-
-        if self._file_tracers:
-            file_data['file_tracers'] = self._file_tracers
-
-        if self._runs:
-            file_data['runs'] = self._runs
-
-        # Write the data to the file.
-        file_obj.write(self._GO_AWAY)
-        json.dump(file_data, file_obj, separators=(',', ':'))
-
-    def _write_file(self, filename):
-        """Write the coverage data to `filename`."""
-        if self._debug.should('dataio'):
-            self._debug.write("Writing data to %r" % (filename,))
-        with open(filename, 'w') as fdata:
-            self._write_fileobj(fdata)
-
-    def erase(self, parallel=False):
-        """Erase the data in this object.
-
-        If `parallel` is true, then also deletes data files created from the
-        basename by parallel-mode.
-
-        """
-        self._lines = None
-        self._arcs = None
-        self._file_tracers = {}
-        self._runs = []
-        self._validate()
-
-        if self._debug.should('dataio'):
-            self._debug.write("Erasing data file %r" % (self.filename,))
-        file_be_gone(self.filename)
-        if parallel:
-            data_dir, local = os.path.split(self.filename)
-            localdot = local + '.*'
-            pattern = os.path.join(os.path.abspath(data_dir), localdot)
-            for filename in glob.glob(pattern):
-                if self._debug.should('dataio'):
-                    self._debug.write("Erasing parallel data file %r" % (filename,))
-                file_be_gone(filename)
-
-    def update(self, other_data, aliases=None):
-        """Update this data with data from another `CoverageData`.
-
-        If `aliases` is provided, it's a `PathAliases` object that is used to
-        re-map paths to match the local machine's.
-
-        """
-        if self._has_lines() and other_data._has_arcs():
-            raise CoverageException("Can't combine arc data with line data")
-        if self._has_arcs() and other_data._has_lines():
-            raise CoverageException("Can't combine line data with arc data")
-
-        aliases = aliases or PathAliases()
-
-        # _file_tracers: only have a string, so they have to agree.
-        # Have to do these first, so that our examination of self._arcs and
-        # self._lines won't be confused by data updated from other_data.
-        for filename in other_data.measured_files():
-            other_plugin = other_data.file_tracer(filename)
-            filename = aliases.map(filename)
-            this_plugin = self.file_tracer(filename)
-            if this_plugin is None:
-                if other_plugin:
-                    self._file_tracers[filename] = other_plugin
-            elif this_plugin != other_plugin:
-                raise CoverageException(
-                    "Conflicting file tracer name for '%s': %r vs %r" % (
-                        filename, this_plugin, other_plugin,
-                    )
-                )
-
-        # _runs: add the new runs to these runs.
-        self._runs.extend(other_data._runs)
-
-        # _lines: merge dicts.
-        if other_data._has_lines():
-            if self._lines is None:
-                self._lines = {}
-            for filename, file_lines in iitems(other_data._lines):
-                filename = aliases.map(filename)
-                if filename in self._lines:
-                    lines = set(self._lines[filename])
-                    lines.update(file_lines)
-                    file_lines = list(lines)
-                self._lines[filename] = file_lines
-
-        # _arcs: merge dicts.
-        if other_data._has_arcs():
-            if self._arcs is None:
-                self._arcs = {}
-            for filename, file_arcs in iitems(other_data._arcs):
-                filename = aliases.map(filename)
-                if filename in self._arcs:
-                    arcs = set(self._arcs[filename])
-                    arcs.update(file_arcs)
-                    file_arcs = list(arcs)
-                self._arcs[filename] = file_arcs
-
-        self._validate()
-
-    ##
-    ## Miscellaneous
-    ##
-
-    def _validate(self):
-        """If we are in paranoid mode, validate that everything is right."""
-        if env.TESTING:
-            self._validate_invariants()
-
-    def _validate_invariants(self):
-        """Validate internal invariants."""
-        # Only one of _lines or _arcs should exist.
-        assert not(self._has_lines() and self._has_arcs()), (
-            "Shouldn't have both _lines and _arcs"
-        )
-
-        # _lines should be a dict of lists of ints.
-        if self._has_lines():
-            for fname, lines in iitems(self._lines):
-                assert isinstance(fname, string_class), "Key in _lines shouldn't be %r" % (fname,)
-                assert all(isinstance(x, int) for x in lines), (
-                    "_lines[%r] shouldn't be %r" % (fname, lines)
-                )
-
-        # _arcs should be a dict of lists of pairs of ints.
-        if self._has_arcs():
-            for fname, arcs in iitems(self._arcs):
-                assert isinstance(fname, string_class), "Key in _arcs shouldn't be %r" % (fname,)
-                assert all(isinstance(x, int) and isinstance(y, int) for x, y in arcs), (
-                    "_arcs[%r] shouldn't be %r" % (fname, arcs)
-                )
-
-        # _file_tracers should have only non-empty strings as values.
-        for fname, plugin in iitems(self._file_tracers):
-            assert isinstance(fname, string_class), (
-                "Key in _file_tracers shouldn't be %r" % (fname,)
-            )
-            assert plugin and isinstance(plugin, string_class), (
-                "_file_tracers[%r] shoudn't be %r" % (fname, plugin)
-            )
-
-        # _runs should be a list of dicts.
-        for val in self._runs:
-            assert isinstance(val, dict)
-            for key in val:
-                assert isinstance(key, string_class), "Key in _runs shouldn't be %r" % (key,)
-
-    ##
-    ## Internal
-    ##
-
-    def _has_lines(self):
-        """Do we have data in self._lines?"""
-        return self._lines is not None
-
-    def _has_arcs(self):
-        """Do we have data in self._arcs?"""
-        return self._arcs is not None
+"""
 
+import glob
+import os.path
 
-# $set_env.py: COVERAGE_STORAGE - The storage implementation to use: sql (default), or json.
-STORAGE = os.environ.get("COVERAGE_STORAGE", "sql")
-if STORAGE == "json":
-    CoverageData = CoverageJsonData
-elif STORAGE == "sql":
-    from coverage.sqldata import CoverageSqliteData
-    CoverageData = CoverageSqliteData
+from coverage.misc import CoverageException, file_be_gone
+from coverage.sqldata import CoverageData
 
 
 def line_counts(data, fullpath=False):
@@ -769,53 +122,3 @@ def combine_parallel_data(data, aliases=None, data_paths=None, strict=False):
 
     if strict and not files_combined:
         raise CoverageException("No usable data files")
-
-def canonicalize_json_data(data):
-    """Canonicalize our JSON data so it can be compared."""
-    for fname, lines in iitems(data.get('lines', {})):
-        data['lines'][fname] = sorted(lines)
-    for fname, arcs in iitems(data.get('arcs', {})):
-        data['arcs'][fname] = sorted(arcs)
-
-
-def pretty_data(data):
-    """Format data as JSON, but as nicely as possible.
-
-    Returns a string.
-
-    """
-    # Start with a basic JSON dump.
-    out = json.dumps(data, indent=4, sort_keys=True)
-    # But pairs of numbers shouldn't be split across lines...
-    out = re.sub(r"\[\s+(-?\d+),\s+(-?\d+)\s+]", r"[\1, \2]", out)
-    # Trailing spaces mess with tests, get rid of them.
-    out = re.sub(r"(?m)\s+$", "", out)
-    return out
-
-
-def debug_main(args):
-    """Dump the raw data from data files.
-
-    Run this as::
-
-        $ python -m coverage.data [FILE]
-
-    """
-    parser = optparse.OptionParser()
-    parser.add_option(
-        "-c", "--canonical", action="store_true",
-        help="Sort data into a canonical order",
-    )
-    options, args = parser.parse_args(args)
-
-    for filename in (args or [".coverage"]):
-        print("--- {0} ------------------------------".format(filename))
-        data = CoverageData._read_raw_data_file(filename)
-        if options.canonical:
-            canonicalize_json_data(data)
-        print(pretty_data(data))
-
-
-if __name__ == '__main__':
-    import sys
-    debug_main(sys.argv[1:])
diff --git a/coverage/misc.py b/coverage/misc.py
index ad7b834f..00e88fdb 100644
--- a/coverage/misc.py
+++ b/coverage/misc.py
@@ -9,7 +9,9 @@ import inspect
 import locale
 import os
 import os.path
+import random
 import re
+import socket
 import sys
 import types
 
@@ -175,6 +177,18 @@ def output_encoding(outfile=None):
     return encoding
 
 
+def filename_suffix(suffix):
+    """Compute a filename suffix for a data file."""
+    if suffix is True:
+        # If data_suffix was a simple true value, then make a suffix with
+        # plenty of distinguishing information.  We do this here in
+        # `save()` at the last minute so that the pid will be correct even
+        # if the process forks.
+        dice = random.Random(os.urandom(8)).randint(0, 999999)
+        suffix = "%s.%s.%06d" % (socket.gethostname(), os.getpid(), dice)
+    return suffix
+
+
 class Hasher(object):
     """Hashes Python data into md5."""
     def __init__(self):
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index af1c837a..2a380893 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -4,8 +4,6 @@
 """Sqlite coverage data."""
 
 # TODO: get sys_info for data class, so we can see sqlite version etc
-# TODO: get rid of skip_unless_data_storage_is
-# TODO: get rid of "JSON message" and "SQL message" in the tests
 # TODO: factor out dataop debugging to a wrapper class?
 # TODO: make sure all dataop debugging is in place somehow
 # TODO: should writes be batched?
@@ -19,10 +17,9 @@ import sqlite3
 import sys
 
 from coverage.backward import get_thread_id, iitems
-from coverage.data import filename_suffix
 from coverage.debug import NoDebugging, SimpleReprMixin
 from coverage.files import PathAliases
-from coverage.misc import CoverageException, file_be_gone
+from coverage.misc import CoverageException, file_be_gone, filename_suffix
 
 
 SCHEMA_VERSION = 2
@@ -82,7 +79,72 @@ CREATE TABLE tracer (
 """
 
 
-class CoverageSqliteData(SimpleReprMixin):
+class CoverageData(SimpleReprMixin):
+    """Manages collected coverage data, including file storage.
+
+    TODO: This is the 4.x docstring. Update it for 5.0.
+
+    This class is the public supported API to the data coverage.py collects
+    during program execution.  It includes information about what code was
+    executed. It does not include information from the analysis phase, to
+    determine what lines could have been executed, or what lines were not
+    executed.
+
+    .. note::
+
+        The file format is not documented or guaranteed.  It will change in
+        the future, in possibly complicated ways.  Do not read coverage.py
+        data files directly.  Use this API to avoid disruption.
+
+    There are a number of kinds of data that can be collected:
+
+    * **lines**: the line numbers of source lines that were executed.
+      These are always available.
+
+    * **arcs**: pairs of source and destination line numbers for transitions
+      between source lines.  These are only available if branch coverage was
+      used.
+
+    * **file tracer names**: the module names of the file tracer plugins that
+      handled each file in the data.
+
+    * **run information**: information about the program execution.  This is
+      written during "coverage run", and then accumulated during "coverage
+      combine".
+
+    Lines, arcs, and file tracer names are stored for each source file. File
+    names in this API are case-sensitive, even on platforms with
+    case-insensitive file systems.
+
+    A data file is associated with the data when the :class:`CoverageData`
+    is created.
+
+    To read a coverage.py data file, use :meth:`read`.  You can then
+    access the line, arc, or file tracer data with :meth:`lines`, :meth:`arcs`,
+    or :meth:`file_tracer`.  Run information is available with
+    :meth:`run_infos`.
+
+    The :meth:`has_arcs` method indicates whether arc data is available.  You
+    can get a list of the files in the data with :meth:`measured_files`.
+    A summary of the line data is available from :meth:`line_counts`.  As with
+    most Python containers, you can determine if there is any data at all by
+    using this object as a boolean value.
+
+    Most data files will be created by coverage.py itself, but you can use
+    methods here to create data files if you like.  The :meth:`add_lines`,
+    :meth:`add_arcs`, and :meth:`add_file_tracers` methods add data, in ways
+    that are convenient for coverage.py.  The :meth:`add_run_info` method adds
+    key-value pairs to the run information.
+
+    To add a source file without any measured data, use :meth:`touch_file`.
+
+    Write the data to its file with :meth:`write`.
+
+    You can clear the data in memory with :meth:`erase`.  Two data collections
+    can be combined by using :meth:`update` on one :class:`CoverageData`,
+    passing it the other.
+
+    """
 
     def __init__(self, basename=None, suffix=None, warn=None, debug=None):
         self._basename = os.path.abspath(basename or ".coverage")
@@ -348,7 +410,7 @@ class CoverageSqliteData(SimpleReprMixin):
         if self._debug.should('dataop'):
             self._debug.write("Touching %r" % (filename,))
         if not self._has_arcs and not self._has_lines:
-            raise CoverageException("Can't touch files in an empty CoverageSqliteData")
+            raise CoverageException("Can't touch files in an empty CoverageData")
 
         self._file_id(filename, add=True)
         if plugin_name:
diff --git a/lab/gendata.py b/lab/gendata.py
deleted file mode 100644
index 27ad4fda..00000000
--- a/lab/gendata.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Run some timing tests of JsonData vs SqliteData.
-
-import random
-import time
-
-from coverage.data import CoverageJsonData
-from coverage.sqldata import CoverageSqliteData
-
-NUM_FILES = 1000
-NUM_LINES = 1000
-
-def gen_data(cdata):
-    rnd = random.Random()
-    rnd.seed(17)
-
-    def linenos(num_lines, prob):
-        return (n for n in range(num_lines) if random.random() < prob)
-
-    start = time.time()
-    for i in range(NUM_FILES):
-        filename = "/src/foo/project/file{i}.py".format(i=i)
-        line_data = { filename: dict.fromkeys(linenos(NUM_LINES, .6)) }
-        cdata.add_lines(line_data)
-
-    cdata.write()
-    end = time.time()
-    delta = end - start
-    return delta
-
-class DummyData:
-    def add_lines(self, line_data):
-        return
-    def write(self):
-        return
-
-overhead = gen_data(DummyData())
-jtime = gen_data(CoverageJsonData("gendata.json")) - overhead
-stime = gen_data(CoverageSqliteData("gendata.db")) - overhead
-print("Overhead: {overhead:.3f}s".format(overhead=overhead))
-print("JSON: {jtime:.3f}s".format(jtime=jtime))
-print("SQLite: {stime:.3f}s".format(stime=stime))
-print("{slower:.3f}x slower".format(slower=stime/jtime))
diff --git a/tests/coveragetest.py b/tests/coveragetest.py
index b06db896..edb57356 100644
--- a/tests/coveragetest.py
+++ b/tests/coveragetest.py
@@ -25,7 +25,6 @@ from coverage import env
 from coverage.backunittest import TestCase, unittest
 from coverage.backward import StringIO, import_local_file, string_class, shlex_quote
 from coverage.cmdline import CoverageScript
-from coverage.data import STORAGE
 from coverage.misc import arcz_to_arcs, StopEverything
 
 from tests.helpers import run_command, SuperModuleCleaner
@@ -102,8 +101,8 @@ class CoverageTest(
 
     def skip_unless_data_storage_is(self, storage):
         """Skip a test for tests that are particular about the storage implementation."""
-        if STORAGE != storage:
-            self.skipTest("Not using {} for data storage".format(storage))
+        assert storage == "json"
+        self.skipTest("Some features haven't been implemented in SQL yet.")
 
     def clean_local_file_imports(self):
         """Clean up the results of calls to `import_local_file`.
diff --git a/tests/test_context.py b/tests/test_context.py
index 24e17069..5d40e339 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -18,10 +18,6 @@ from tests.coveragetest import CoverageTest
 class StaticContextTest(CoverageTest):
     """Tests of the static context."""
 
-    def setUp(self):
-        super(StaticContextTest, self).setUp()
-        self.skip_unless_data_storage_is("sql")
-
     def test_no_context(self):
         self.make_file("main.py", "a = 1")
         cov = coverage.Coverage()
@@ -115,7 +111,6 @@ class DynamicContextTest(CoverageTest):
         if not env.C_TRACER:
             self.skipTest("Only the C tracer supports dynamic contexts")
         super(DynamicContextTest, self).setUp()
-        self.skip_unless_data_storage_is("sql")
 
     SOURCE = """\
         def helper(lineno):
diff --git a/tests/test_data.py b/tests/test_data.py
index 4a3db93c..bc988bcb 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -4,17 +4,15 @@
 """Tests for coverage.data"""
 
 import glob
-import json
 import os
 import os.path
-import re
 import sqlite3
 import threading
 
 import mock
 
-from coverage.data import CoverageData, debug_main, canonicalize_json_data, combine_parallel_data
-from coverage.data import add_data_to_hash, line_counts, STORAGE
+from coverage.data import CoverageData, combine_parallel_data
+from coverage.data import add_data_to_hash, line_counts
 from coverage.debug import DebugControlString
 from coverage.files import PathAliases, canonical_filename
 from coverage.misc import CoverageException
@@ -107,9 +105,6 @@ class DataTestHelpers(CoverageTest):
 class CoverageDataTest(DataTestHelpers, CoverageTest):
     """Test cases for CoverageData."""
 
-    # SQL data storage always has files on disk, even without .write().
-    # We need to separate the tests so they don't clobber each other.
-    run_in_temp_dir = STORAGE == "sql"
     no_files_in_temp_dir = True
 
     def test_empty_data_is_false(self):
@@ -185,7 +180,6 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         self.assert_measured_files(covdata, MEASURED_FILES_3 + ['zzz.py'])
 
     def test_set_query_contexts(self):
-        self.skip_unless_data_storage_is("sql")
         covdata = CoverageData()
         covdata.set_context('test_a')
         covdata.add_lines(LINES_1)
@@ -202,7 +196,6 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         self.assertIsNone(covdata.lines('no_such_file.py'))
 
     def test_lines_with_contexts(self):
-        self.skip_unless_data_storage_is("sql")
         covdata = CoverageData()
         covdata.set_context('test_a')
         covdata.add_lines(LINES_1)
@@ -211,7 +204,6 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         self.assertEqual(covdata.lines('a.py', contexts=['other*']), [])
 
     def test_contexts_by_lineno_with_lines(self):
-        self.skip_unless_data_storage_is("sql")
         covdata = CoverageData()
         covdata.set_context('test_a')
         covdata.add_lines(LINES_1)
@@ -254,7 +246,6 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         self.assertIsNone(covdata.arcs('no_such_file.py'))
 
     def test_arcs_with_contexts(self):
-        self.skip_unless_data_storage_is("sql")
         covdata = CoverageData()
         covdata.set_context('test_x')
         covdata.add_arcs(ARCS_3)
@@ -265,7 +256,6 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
         self.assertEqual(covdata.arcs('x.py', contexts=['other*']), [])
 
     def test_contexts_by_lineno_with_arcs(self):
-        self.skip_unless_data_storage_is("sql")
         covdata = CoverageData()
         covdata.set_context('test_x')
         covdata.add_arcs(ARCS_3)
@@ -274,7 +264,6 @@ class CoverageDataTest(DataTestHelpers, CoverageTest):
             {-1: ['test_x'], 1: ['test_x'], 2: ['test_x'], 3: ['test_x']})
 
     def test_contexts_by_lineno_with_unknown_file(self):
-        self.skip_unless_data_storage_is("sql")
         covdata = CoverageData()
         self.assertDictEqual(
             covdata.contexts_by_lineno('xyz.py'), {})
@@ -562,17 +551,7 @@ class CoverageDataTestInTempDir(DataTestHelpers, CoverageTest):
             covdata.read()
         self.assertFalse(covdata)
 
-    def test_read_json_errors(self):
-        self.skip_unless_data_storage_is("json")
-        self.make_file("misleading.dat", CoverageData._GO_AWAY + " this isn't JSON")
-        msg = r"Couldn't .* '.*[/\\]{0}': \S+"
-        with self.assertRaisesRegex(CoverageException, msg.format("misleading.dat")):
-            covdata = CoverageData("misleading.dat")
-            covdata.read()
-        self.assertFalse(covdata)
-
     def test_read_sql_errors(self):
-        self.skip_unless_data_storage_is("sql")
         with sqlite3.connect("wrong_schema.db") as con:
             con.execute("create table coverage_schema (version integer)")
             con.execute("insert into coverage_schema (version) values (99)")
@@ -590,51 +569,6 @@ class CoverageDataTestInTempDir(DataTestHelpers, CoverageTest):
             covdata.read()
         self.assertFalse(covdata)
 
-    def test_debug_main(self):
-        self.skip_unless_data_storage_is("json")
-        covdata1 = CoverageData(".coverage")
-        covdata1.add_lines(LINES_1)
-        covdata1.write()
-        debug_main([])
-
-        covdata2 = CoverageData("arcs.dat")
-        covdata2.add_arcs(ARCS_3)
-        covdata2.add_file_tracers({"y.py": "magic_plugin"})
-        covdata2.add_run_info(version="v3.14", chunks=["z", "a"])
-        covdata2.write()
-
-        covdata3 = CoverageData("empty.dat")
-        covdata3.write()
-        debug_main(["arcs.dat", "empty.dat"])
-
-        expected = {
-            ".coverage": {
-                "lines": {
-                    "a.py": [1, 2],
-                    "b.py": [3],
-                },
-            },
-            "arcs.dat": {
-                "arcs": {
-                    "x.py": [[-1, 1], [1, 2], [2, 3], [3, -1]],
-                    "y.py": [[-1, 17], [17, 23], [23, -1]],
-                },
-                "file_tracers": {"y.py": "magic_plugin"},
-                "runs": [
-                    {
-                        "chunks": ["z", "a"],
-                        "version": "v3.14",
-                    },
-                ],
-            },
-            "empty.dat": {},
-        }
-        pieces = re.split(r"(?m)-+ ([\w.]+) -+$", self.stdout())
-        for name, json_out in zip(pieces[1::2], pieces[2::2]):
-            json_got = json.loads(json_out)
-            canonicalize_json_data(json_got)
-            self.assertEqual(expected[name], json_got)
-
 
 class CoverageDataFilesTest(DataTestHelpers, CoverageTest):
     """Tests of CoverageData file handling."""
@@ -670,14 +604,9 @@ class CoverageDataFilesTest(DataTestHelpers, CoverageTest):
 
         self.assertRegex(
             debug.get_output(),
-            r"("    # JSON output:
-            r"^Writing data to '.*\.coverage'\n"
-            r"Reading data from '.*\.coverage'\n$"
-            r"|"    # SQL output:
-            r"Erasing data file '.*\.coverage'\n"
+            r"^Erasing data file '.*\.coverage'\n"
             r"Creating data file '.*\.coverage'\n"
             r"Opening data file '.*\.coverage'\n$"
-            r")"
         )
 
     def test_debug_output_without_debug_option(self):
@@ -766,59 +695,6 @@ class CoverageDataFilesTest(DataTestHelpers, CoverageTest):
         self.assert_file_count("datafile.*", 0)
         self.assert_exists(".coverage")
 
-    def read_json_data_file(self, fname):
-        """Read a JSON data file for testing the JSON directly."""
-        self.skip_unless_data_storage_is("json")
-        with open(fname, 'r') as fdata:
-            go_away = fdata.read(len(CoverageData._GO_AWAY))
-            self.assertEqual(go_away, CoverageData._GO_AWAY)
-            return json.load(fdata)
-
-    def test_file_format(self):
-        # Write with CoverageData, then read the JSON explicitly.
-        covdata = CoverageData()
-        covdata.add_lines(LINES_1)
-        covdata.write()
-
-        data = self.read_json_data_file(".coverage")
-
-        lines = data['lines']
-        self.assertCountEqual(lines.keys(), MEASURED_FILES_1)
-        self.assertCountEqual(lines['a.py'], A_PY_LINES_1)
-        self.assertCountEqual(lines['b.py'], B_PY_LINES_1)
-        # If not measuring branches, there's no arcs entry.
-        self.assertNotIn('arcs', data)
-        # If no file tracers were involved, there's no file_tracers entry.
-        self.assertNotIn('file_tracers', data)
-
-    def test_file_format_with_arcs(self):
-        # Write with CoverageData, then read the JSON explicitly.
-        covdata = CoverageData()
-        covdata.add_arcs(ARCS_3)
-        covdata.write()
-
-        data = self.read_json_data_file(".coverage")
-
-        self.assertNotIn('lines', data)
-        arcs = data['arcs']
-        self.assertCountEqual(arcs.keys(), MEASURED_FILES_3)
-        self.assertCountEqual(arcs['x.py'], map(list, X_PY_ARCS_3))
-        self.assertCountEqual(arcs['y.py'], map(list, Y_PY_ARCS_3))
-        # If no file tracers were involved, there's no file_tracers entry.
-        self.assertNotIn('file_tracers', data)
-
-    def test_writing_to_other_file(self):
-        self.skipTest("This will be deleted!")  # TODO
-        covdata = CoverageData(".otherfile")
-        covdata.add_lines(LINES_1)
-        covdata.write()
-        self.assert_doesnt_exist(".coverage")
-        self.assert_exists(".otherfile")
-
-        covdata.write(suffix="extra")
-        self.assert_exists(".otherfile.extra")
-        self.assert_doesnt_exist(".coverage")
-
     def test_combining_with_aliases(self):
         covdata1 = CoverageData(suffix='1')
         covdata1.add_lines({
diff --git a/tests/test_debug.py b/tests/test_debug.py
index 351ef919..7d4c0a16 100644
--- a/tests/test_debug.py
+++ b/tests/test_debug.py
@@ -141,13 +141,8 @@ class DebugTraceTest(CoverageTest):
         # The details of what to expect on the stack are empirical, and can change
         # as the code changes. This test is here to ensure that the debug code
         # continues working. It's ok to adjust these details over time.
-        from coverage.data import STORAGE
-        if STORAGE == "json":
-            self.assertRegex(real_messages[-1], r"^\s*\d+\.\w{4}: Writing data")
-            self.assertRegex(last_line, r"\s+_write_file : .*coverage[/\\]data.py @\d+$")
-        else:
-            self.assertRegex(real_messages[-1], r"^\s*\d+\.\w{4}: Adding file tracers: 0 files")
-            self.assertRegex(last_line, r"\s+add_file_tracers : .*coverage[/\\]sqldata.py @\d+$")
+        self.assertRegex(real_messages[-1], r"^\s*\d+\.\w{4}: Adding file tracers: 0 files")
+        self.assertRegex(last_line, r"\s+add_file_tracers : .*coverage[/\\]sqldata.py @\d+$")
 
     def test_debug_config(self):
         out_lines = self.f1_debug_output(["config"])
diff --git a/tests/test_html.py b/tests/test_html.py
index 8e9430ce..3e567113 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -1050,7 +1050,6 @@ class HtmlWithContextsTest(HtmlTestHelpers, CoverageTest):
         if not env.C_TRACER:
             self.skipTest("Only the C tracer supports dynamic contexts")
         super(HtmlWithContextsTest, self).setUp()
-        self.skip_unless_data_storage_is("sql")
 
     SOURCE = """\
         def helper(lineno):
author	Ned Batchelder <ned@nedbatchelder.com>	2019-07-09 16:22:51 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2019-07-10 06:57:24 -0400
commit	6b226d85f5191cd27b20ad27caded8b407772a02 (patch)
tree	a7d2e58246fb5ef9b8128ff342e97968c4ace853
parent	9bc6b93805a5f20a87211a315d00503eddab66dc (diff)
download	python-coveragepy-git-6b226d85f5191cd27b20ad27caded8b407772a02.tar.gz