summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre Sassoulas <pierre.sassoulas@gmail.com>2021-12-29 11:13:27 +0100
committerGitHub <noreply@github.com>2021-12-29 11:13:27 +0100
commit532be9afa75bd090a389db0f172c47ba9d37b411 (patch)
treebdfa28e5be2234d2d9c04ee49094408069f45dd0
parente815843293adf100662fe4e183c34f3040529a15 (diff)
downloadpylint-git-532be9afa75bd090a389db0f172c47ba9d37b411.tar.gz
Add caching to bottlenecks in the message store (#5605)
Some functions can't be cached without impacting the correctness with the current design. Co-authored-by: Daniƫl van Noord <13665637+DanielNoord@users.noreply.github.com>
-rw-r--r--pylint/lint/pylinter.py17
-rw-r--r--pylint/message/message_definition_store.py9
-rw-r--r--pylint/message/message_id_store.py18
3 files changed, 33 insertions, 11 deletions
diff --git a/pylint/lint/pylinter.py b/pylint/lint/pylinter.py
index ec276d42d..1d6b5a3b1 100644
--- a/pylint/lint/pylinter.py
+++ b/pylint/lint/pylinter.py
@@ -1389,7 +1389,11 @@ class PyLinter(
return None
def _is_one_message_enabled(self, msgid: str, line: Optional[int]) -> bool:
- """Checks state of a single message"""
+ """Checks state of a single message for the current file
+
+ This function can't be cached as it depends on self.file_state which can
+ change.
+ """
if line is None:
return self._msgs_state.get(msgid, True)
try:
@@ -1426,10 +1430,15 @@ class PyLinter(
line: Optional[int] = None,
confidence: Optional[interfaces.Confidence] = None,
) -> bool:
- """return whether the message associated to the given message id is
- enabled
+ """Return whether this message is enabled for the current file, line and confidence level.
+
+ This function can't be cached right now as the line is the line of
+ the currently analysed file (self.file_state), if it changes, then the
+ result for the same msg_descr/line might need to change.
- msgid may be either a numeric or symbolic message id.
+ :param msg_descr: Either the msgid or the symbol for a MessageDefinition
+ :param line: The line of the currently analysed file
+ :param confidence: The confidence of the message
"""
if self.config.confidence and confidence:
if confidence.name not in self.config.confidence:
diff --git a/pylint/message/message_definition_store.py b/pylint/message/message_definition_store.py
index c160a85ba..766cdd446 100644
--- a/pylint/message/message_definition_store.py
+++ b/pylint/message/message_definition_store.py
@@ -2,6 +2,7 @@
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
import collections
+import functools
from typing import TYPE_CHECKING, Dict, List, Tuple, ValuesView
from pylint.exceptions import UnknownMessageError
@@ -46,8 +47,14 @@ class MessageDefinitionStore:
self._messages_definitions[message.msgid] = message
self._msgs_by_category[message.msgid[0]].append(message.msgid)
+ @functools.lru_cache()
def get_message_definitions(self, msgid_or_symbol: str) -> List[MessageDefinition]:
- """Returns the Message definition for either a numeric or symbolic id."""
+ """Returns the Message definition for either a numeric or symbolic id.
+
+ The cache has no limit as its size will likely stay minimal. For each message we store
+ about 1000 characters, so even if we would have 1000 messages the cache would only
+ take up ~= 1 Mb.
+ """
return [
self._messages_definitions[m]
for m in self.message_id_store.get_active_msgids(msgid_or_symbol)
diff --git a/pylint/message/message_id_store.py b/pylint/message/message_id_store.py
index 1fbe68471..a16d12bfe 100644
--- a/pylint/message/message_id_store.py
+++ b/pylint/message/message_id_store.py
@@ -1,5 +1,6 @@
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
+import functools
from typing import Dict, List, NoReturn, Optional, Tuple
from pylint.exceptions import InvalidMessageError, UnknownMessageError
@@ -101,18 +102,23 @@ class MessageIdStore:
)
raise InvalidMessageError(error_message)
+ @functools.lru_cache()
def get_active_msgids(self, msgid_or_symbol: str) -> List[str]:
- """Return msgids but the input can be a symbol."""
- # Only msgid can have a digit as second letter
- is_msgid: bool = msgid_or_symbol[1:].isdigit()
- msgid = None
- if is_msgid:
+ """Return msgids but the input can be a symbol.
+
+ The cache has no limit as its size will likely stay minimal. For each message we store
+ about 1000 characters, so even if we would have 1000 messages the cache would only
+ take up ~= 1 Mb.
+ """
+ msgid: Optional[str]
+ if msgid_or_symbol[1:].isdigit():
+ # Only msgid can have a digit as second letter
msgid = msgid_or_symbol.upper()
symbol = self.__msgid_to_symbol.get(msgid)
else:
msgid = self.__symbol_to_msgid.get(msgid_or_symbol)
symbol = msgid_or_symbol
- if msgid is None or symbol is None or not msgid or not symbol:
+ if not msgid or not symbol:
error_msg = f"No such message id or symbol '{msgid_or_symbol}'."
raise UnknownMessageError(error_msg)
return self.__old_names.get(msgid, [msgid])