pylint/checkers/raw_metrics.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

# Copyright (c) 2003-2013 LOGILAB S.A. (Paris, FRANCE).
# http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" Copyright (c) 2003-2010 LOGILAB S.A. (Paris, FRANCE).
 http://www.logilab.fr/ -- mailto:contact@logilab.fr

Raw metrics checker
"""

import tokenize

# pylint now requires pylint >= 2.2, so this is no longer necessary
#if not hasattr(tokenize, 'NL'):
#    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")

from pylint.interfaces import ITokenChecker
from pylint.utils import EmptyReport
from pylint.checkers import BaseTokenChecker
from pylint.reporters import diff_string
from pylint.reporters.ureports.nodes import Table


def report_raw_stats(sect, stats, old_stats):
    """calculate percentage of code / doc / comment / empty
    """
    total_lines = stats['total_lines']
    if not total_lines:
        raise EmptyReport()
    sect.description = '%s lines have been analyzed' % total_lines
    lines = ('type', 'number', '%', 'previous', 'difference')
    for node_type in ('code', 'docstring', 'comment', 'empty'):
        key = node_type + '_lines'
        total = stats[key]
        percent = float(total * 100) / total_lines
        old = old_stats.get(key, None)
        if old is not None:
            diff_str = diff_string(old, total)
        else:
            old, diff_str = 'NC', 'NC'
        lines += (node_type, str(total), '%.2f' % percent,
                  str(old), diff_str)
    sect.append(Table(children=lines, cols=5, rheaders=1))


class RawMetricsChecker(BaseTokenChecker):
    """does not check anything but gives some raw metrics :
    * total number of lines
    * total number of code lines
    * total number of docstring lines
    * total number of comments lines
    * total number of empty lines
    """

    __implements__ = (ITokenChecker,)

    # configuration section name
    name = 'metrics'
    # configuration options
    options = ()
    # messages
    msgs = {}
    # reports
    reports = (('RP0701', 'Raw metrics', report_raw_stats),)

    def __init__(self, linter):
        BaseTokenChecker.__init__(self, linter)
        self.stats = None

    def open(self):
        """init statistics"""
        self.stats = self.linter.add_stats(total_lines=0, code_lines=0,
                                           empty_lines=0, docstring_lines=0,
                                           comment_lines=0)

    def process_tokens(self, tokens):
        """update stats"""
        i = 0
        tokens = list(tokens)
        while i < len(tokens):
            i, lines_number, line_type = get_type(tokens, i)
            self.stats['total_lines'] += lines_number
            self.stats[line_type] += lines_number


JUNK = (tokenize.NL, tokenize.INDENT, tokenize.NEWLINE, tokenize.ENDMARKER)

def get_type(tokens, start_index):
    """return the line type : docstring, comment, code, empty"""
    i = start_index
    tok_type = tokens[i][0]
    start = tokens[i][2]
    pos = start
    line_type = None
    while i < len(tokens) and tokens[i][2][0] == start[0]:
        tok_type = tokens[i][0]
        pos = tokens[i][3]
        if line_type is None:
            if tok_type == tokenize.STRING:
                line_type = 'docstring_lines'
            elif tok_type == tokenize.COMMENT:
                line_type = 'comment_lines'
            elif tok_type in JUNK:
                pass
            else:
                line_type = 'code_lines'
        i += 1
    if line_type is None:
        line_type = 'empty_lines'
    elif i < len(tokens) and tokens[i][0] == tokenize.NEWLINE:
        i += 1
    return i, pos[0] - start[0] + 1, line_type


def register(linter):
    """ required method to auto register this checker """
    linter.register_checker(RawMetricsChecker(linter))