summaryrefslogtreecommitdiff
path: root/coverage/python.py
blob: 98b0d6ab914cca825825d21af370ce0c84c2f52e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt

"""Python source expertise for coverage.py"""

from __future__ import annotations

import os.path
import types
import zipimport

from typing import cast, Dict, Iterable, Optional, Set, TYPE_CHECKING

from coverage import env
from coverage.exceptions import CoverageException, NoSource
from coverage.files import canonical_filename, relative_filename, zip_location
from coverage.misc import expensive, isolate_module, join_regex
from coverage.parser import PythonParser
from coverage.phystokens import source_token_lines, source_encoding
from coverage.plugin import FileReporter
from coverage.types import TArc, TLineNo, TMorf, TSourceTokenLines

if TYPE_CHECKING:
    from coverage import Coverage

os = isolate_module(os)


def read_python_source(filename: str) -> bytes:
    """Read the Python source text from `filename`.

    Returns bytes.

    """
    with open(filename, "rb") as f:
        source = f.read()

    return source.replace(b"\r\n", b"\n").replace(b"\r", b"\n")


def get_python_source(filename: str) -> str:
    """Return the source code, as unicode."""
    base, ext = os.path.splitext(filename)
    if ext == ".py" and env.WINDOWS:
        exts = [".py", ".pyw"]
    else:
        exts = [ext]

    source_bytes: Optional[bytes]
    for ext in exts:
        try_filename = base + ext
        if os.path.exists(try_filename):
            # A regular text file: open it.
            source_bytes = read_python_source(try_filename)
            break

        # Maybe it's in a zip file?
        source_bytes = get_zip_bytes(try_filename)
        if source_bytes is not None:
            break
    else:
        # Couldn't find source.
        raise NoSource(f"No source for code: '{filename}'.")

    # Replace \f because of http://bugs.python.org/issue19035
    source_bytes = source_bytes.replace(b'\f', b' ')
    source = source_bytes.decode(source_encoding(source_bytes), "replace")

    # Python code should always end with a line with a newline.
    if source and source[-1] != '\n':
        source += '\n'

    return source


def get_zip_bytes(filename: str) -> Optional[bytes]:
    """Get data from `filename` if it is a zip file path.

    Returns the bytestring data read from the zip file, or None if no zip file
    could be found or `filename` isn't in it.  The data returned will be
    an empty string if the file is empty.

    """
    zipfile_inner = zip_location(filename)
    if zipfile_inner is not None:
        zipfile, inner = zipfile_inner
        try:
            zi = zipimport.zipimporter(zipfile)
        except zipimport.ZipImportError:
            return None
        try:
            # typeshed is wrong for get_data: https://github.com/python/typeshed/pull/9428
            data = cast(bytes, zi.get_data(inner))
        except OSError:
            return None
        return data
    return None


def source_for_file(filename: str) -> str:
    """Return the source filename for `filename`.

    Given a file name being traced, return the best guess as to the source
    file to attribute it to.

    """
    if filename.endswith(".py"):
        # .py files are themselves source files.
        return filename

    elif filename.endswith((".pyc", ".pyo")):
        # Bytecode files probably have source files near them.
        py_filename = filename[:-1]
        if os.path.exists(py_filename):
            # Found a .py file, use that.
            return py_filename
        if env.WINDOWS:
            # On Windows, it could be a .pyw file.
            pyw_filename = py_filename + "w"
            if os.path.exists(pyw_filename):
                return pyw_filename
        # Didn't find source, but it's probably the .py file we want.
        return py_filename

    # No idea, just use the file name as-is.
    return filename


def source_for_morf(morf: TMorf) -> str:
    """Get the source filename for the module-or-file `morf`."""
    if hasattr(morf, '__file__') and morf.__file__:
        filename = morf.__file__
    elif isinstance(morf, types.ModuleType):
        # A module should have had .__file__, otherwise we can't use it.
        # This could be a PEP-420 namespace package.
        raise CoverageException(f"Module {morf} has no file")
    else:
        filename = morf

    filename = source_for_file(filename)
    return filename


class PythonFileReporter(FileReporter):
    """Report support for a Python file."""

    def __init__(self, morf: TMorf, coverage: Optional[Coverage] = None) -> None:
        self.coverage = coverage

        filename = source_for_morf(morf)

        fname = filename
        canonicalize = True
        if self.coverage is not None:
            if self.coverage.config.relative_files:
                canonicalize = False
        if canonicalize:
            fname = canonical_filename(filename)
        super().__init__(fname)

        if hasattr(morf, '__name__'):
            name = morf.__name__.replace(".", os.sep)
            if os.path.basename(filename).startswith('__init__.'):
                name += os.sep + "__init__"
            name += ".py"
        else:
            name = relative_filename(filename)
        self.relname = name

        self._source: Optional[str] = None
        self._parser: Optional[PythonParser] = None
        self._excluded = None

    def __repr__(self) -> str:
        return f"<PythonFileReporter {self.filename!r}>"

    def relative_filename(self) -> str:
        return self.relname

    @property
    def parser(self) -> PythonParser:
        """Lazily create a :class:`PythonParser`."""
        assert self.coverage is not None
        if self._parser is None:
            self._parser = PythonParser(
                filename=self.filename,
                exclude=self.coverage._exclude_regex('exclude'),
            )
            self._parser.parse_source()
        return self._parser

    def lines(self) -> Set[TLineNo]:
        """Return the line numbers of statements in the file."""
        return self.parser.statements

    def excluded_lines(self) -> Set[TLineNo]:
        """Return the line numbers of statements in the file."""
        return self.parser.excluded

    def translate_lines(self, lines: Iterable[TLineNo]) -> Set[TLineNo]:
        return self.parser.translate_lines(lines)

    def translate_arcs(self, arcs: Iterable[TArc]) -> Set[TArc]:
        return self.parser.translate_arcs(arcs)

    @expensive
    def no_branch_lines(self) -> Set[TLineNo]:
        assert self.coverage is not None
        no_branch = self.parser.lines_matching(
            join_regex(self.coverage.config.partial_list),
            join_regex(self.coverage.config.partial_always_list),
        )
        return no_branch

    @expensive
    def arcs(self) -> Set[TArc]:
        return self.parser.arcs()

    @expensive
    def exit_counts(self) -> Dict[TLineNo, int]:
        return self.parser.exit_counts()

    def missing_arc_description(
        self,
        start: TLineNo,
        end: TLineNo,
        executed_arcs: Optional[Iterable[TArc]] = None,
    ) -> str:
        return self.parser.missing_arc_description(start, end, executed_arcs)

    def source(self) -> str:
        if self._source is None:
            self._source = get_python_source(self.filename)
        return self._source

    def should_be_python(self) -> bool:
        """Does it seem like this file should contain Python?

        This is used to decide if a file reported as part of the execution of
        a program was really likely to have contained Python in the first
        place.

        """
        # Get the file extension.
        _, ext = os.path.splitext(self.filename)

        # Anything named *.py* should be Python.
        if ext.startswith('.py'):
            return True
        # A file with no extension should be Python.
        if not ext:
            return True
        # Everything else is probably not Python.
        return False

    def source_token_lines(self) -> TSourceTokenLines:
        return source_token_lines(self.source())