From 532be9afa75bd090a389db0f172c47ba9d37b411 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Wed, 29 Dec 2021 11:13:27 +0100 Subject: Add caching to bottlenecks in the message store (#5605) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some functions can't be cached without impacting the correctness with the current design. Co-authored-by: Daniƫl van Noord <13665637+DanielNoord@users.noreply.github.com> --- pylint/lint/pylinter.py | 17 +++++++++++++---- pylint/message/message_definition_store.py | 9 ++++++++- pylint/message/message_id_store.py | 18 ++++++++++++------ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/pylint/lint/pylinter.py b/pylint/lint/pylinter.py index ec276d42d..1d6b5a3b1 100644 --- a/pylint/lint/pylinter.py +++ b/pylint/lint/pylinter.py @@ -1389,7 +1389,11 @@ class PyLinter( return None def _is_one_message_enabled(self, msgid: str, line: Optional[int]) -> bool: - """Checks state of a single message""" + """Checks state of a single message for the current file + + This function can't be cached as it depends on self.file_state which can + change. + """ if line is None: return self._msgs_state.get(msgid, True) try: @@ -1426,10 +1430,15 @@ class PyLinter( line: Optional[int] = None, confidence: Optional[interfaces.Confidence] = None, ) -> bool: - """return whether the message associated to the given message id is - enabled + """Return whether this message is enabled for the current file, line and confidence level. + + This function can't be cached right now as the line is the line of + the currently analysed file (self.file_state), if it changes, then the + result for the same msg_descr/line might need to change. - msgid may be either a numeric or symbolic message id. + :param msg_descr: Either the msgid or the symbol for a MessageDefinition + :param line: The line of the currently analysed file + :param confidence: The confidence of the message """ if self.config.confidence and confidence: if confidence.name not in self.config.confidence: diff --git a/pylint/message/message_definition_store.py b/pylint/message/message_definition_store.py index c160a85ba..766cdd446 100644 --- a/pylint/message/message_definition_store.py +++ b/pylint/message/message_definition_store.py @@ -2,6 +2,7 @@ # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE import collections +import functools from typing import TYPE_CHECKING, Dict, List, Tuple, ValuesView from pylint.exceptions import UnknownMessageError @@ -46,8 +47,14 @@ class MessageDefinitionStore: self._messages_definitions[message.msgid] = message self._msgs_by_category[message.msgid[0]].append(message.msgid) + @functools.lru_cache() def get_message_definitions(self, msgid_or_symbol: str) -> List[MessageDefinition]: - """Returns the Message definition for either a numeric or symbolic id.""" + """Returns the Message definition for either a numeric or symbolic id. + + The cache has no limit as its size will likely stay minimal. For each message we store + about 1000 characters, so even if we would have 1000 messages the cache would only + take up ~= 1 Mb. + """ return [ self._messages_definitions[m] for m in self.message_id_store.get_active_msgids(msgid_or_symbol) diff --git a/pylint/message/message_id_store.py b/pylint/message/message_id_store.py index 1fbe68471..a16d12bfe 100644 --- a/pylint/message/message_id_store.py +++ b/pylint/message/message_id_store.py @@ -1,5 +1,6 @@ # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE +import functools from typing import Dict, List, NoReturn, Optional, Tuple from pylint.exceptions import InvalidMessageError, UnknownMessageError @@ -101,18 +102,23 @@ class MessageIdStore: ) raise InvalidMessageError(error_message) + @functools.lru_cache() def get_active_msgids(self, msgid_or_symbol: str) -> List[str]: - """Return msgids but the input can be a symbol.""" - # Only msgid can have a digit as second letter - is_msgid: bool = msgid_or_symbol[1:].isdigit() - msgid = None - if is_msgid: + """Return msgids but the input can be a symbol. + + The cache has no limit as its size will likely stay minimal. For each message we store + about 1000 characters, so even if we would have 1000 messages the cache would only + take up ~= 1 Mb. + """ + msgid: Optional[str] + if msgid_or_symbol[1:].isdigit(): + # Only msgid can have a digit as second letter msgid = msgid_or_symbol.upper() symbol = self.__msgid_to_symbol.get(msgid) else: msgid = self.__symbol_to_msgid.get(msgid_or_symbol) symbol = msgid_or_symbol - if msgid is None or symbol is None or not msgid or not symbol: + if not msgid or not symbol: error_msg = f"No such message id or symbol '{msgid_or_symbol}'." raise UnknownMessageError(error_msg) return self.__old_names.get(msgid, [msgid]) -- cgit v1.2.1