diff options
author | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2019-02-02 19:21:39 +0000 |
---|---|---|
committer | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2019-02-02 19:29:20 +0000 |
commit | 35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b (patch) | |
tree | 88669b2a2413bfc04ec68f7dca646e83c3e68643 | |
parent | 805527fcd6bedc2671a37c59299c1fa9148f9e6e (diff) | |
download | psycopg2-35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b.tar.gz |
Use a proper LRU cache for namedtuplesfast-namedtuple
Previous one didn't refresh by last use. Use the stdlib version for py3
and one of our own for py2.
Max size set to 512, which should be fine for everyone (tweaking is
still possible by monkeypatching, as the tests do, but I don't want to
make an interface of it).
-rw-r--r-- | lib/_lru_cache.py | 104 | ||||
-rw-r--r-- | lib/compat.py | 5 | ||||
-rw-r--r-- | lib/extras.py | 26 | ||||
-rwxr-xr-x | tests/test_extras_dictcursor.py | 29 |
4 files changed, 138 insertions, 26 deletions
diff --git a/lib/_lru_cache.py b/lib/_lru_cache.py new file mode 100644 index 0000000..1e2c52d --- /dev/null +++ b/lib/_lru_cache.py @@ -0,0 +1,104 @@ +""" +LRU cache implementation for Python 2.7 + +Ported from http://code.activestate.com/recipes/578078/ and simplified for our +use (only support maxsize > 0 and positional arguments). +""" + +from collections import namedtuple +from functools import update_wrapper +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + + +def lru_cache(maxsize=100): + """Least-recently-used cache decorator. + + Arguments to the cached function must be hashable. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize + + def wrapper(*args): + # size limited caching that tracks accesses by recency + key = args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the + # front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + # oldvalue = root[RESULT] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function diff --git a/lib/compat.py b/lib/compat.py index cfd5a88..185b8f6 100644 --- a/lib/compat.py +++ b/lib/compat.py @@ -1,10 +1,15 @@ import sys +__all__ = ['string_types', 'text_type', 'lru_cache'] + if sys.version_info[0] == 2: # Python 2 string_types = basestring, text_type = unicode + from ._lru_cache import lru_cache + else: # Python 3 string_types = str, text_type = str + from functools import lru_cache diff --git a/lib/extras.py b/lib/extras.py index a4012e2..d73c5b2 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -35,9 +35,10 @@ import logging as _logging import psycopg2 from psycopg2 import extensions as _ext -from psycopg2.extensions import cursor as _cursor -from psycopg2.extensions import connection as _connection -from psycopg2.extensions import adapt as _A, quote_ident +from .extensions import cursor as _cursor +from .extensions import connection as _connection +from .extensions import adapt as _A, quote_ident +from .compat import lru_cache from psycopg2._psycopg import ( # noqa REPLICATION_PHYSICAL, REPLICATION_LOGICAL, @@ -386,28 +387,27 @@ class NamedTupleCursor(_cursor): _re_clean = _re.compile( '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']') - _nt_cache = OrderedDict() - def _make_nt(self): - key = tuple(d[0] for d in (self.description or ())) - nt = self._nt_cache.get(key) - if nt is not None: - return nt + key = tuple(d[0] for d in self.description) if self.description else () + return self._cached_make_nt(key) + def _do_make_nt(self, key): fields = [] for s in key: s = self._re_clean.sub('_', s) + # Python identifier cannot start with numbers, namedtuple fields + # cannot start with underscore. So... if s[0] == '_' or '0' <= s[0] <= '9': s = 'f' + s fields.append(s) nt = namedtuple("Record", fields) - self._nt_cache[key] = nt - while len(self._nt_cache) > self.MAX_CACHE: - self._nt_cache.popitem(last=False) - return nt + # Exposed for testability, and if someone wants to monkeypatch to tweak + # the cache size. + _cached_make_nt = lru_cache(512)(_do_make_nt) + class LoggingConnection(_connection): """A connection that logs all queries to a file or logger__ object. diff --git a/tests/test_extras_dictcursor.py b/tests/test_extras_dictcursor.py index e08da5f..92a25b6 100755 --- a/tests/test_extras_dictcursor.py +++ b/tests/test_extras_dictcursor.py @@ -592,27 +592,30 @@ class NamedTupleCursorTest(ConnectingTestCase): def test_max_cache(self): from psycopg2.extras import NamedTupleCursor - old_max_cache = NamedTupleCursor.MAX_CACHE - NamedTupleCursor.MAX_CACHE = 10 + from psycopg2.compat import lru_cache + + old_func = NamedTupleCursor._cached_make_nt + NamedTupleCursor._cached_make_nt = \ + lru_cache(8)(NamedTupleCursor._do_make_nt) try: - NamedTupleCursor._nt_cache.clear() + recs = [] curs = self.conn.cursor() for i in range(10): curs.execute("select 1 as f%s" % i) - curs.fetchone() + recs.append(curs.fetchone()) - self.assertEqual(len(NamedTupleCursor._nt_cache), 10) - for i in range(10): - self.assert_(('f%s' % i,) in NamedTupleCursor._nt_cache) + # Still in cache + curs.execute("select 1 as f9") + rec = curs.fetchone() + self.assert_(any(type(r) is type(rec) for r in recs)) - curs.execute("select 1 as f10") - curs.fetchone() - self.assertEqual(len(NamedTupleCursor._nt_cache), 10) - self.assert_(('f10',) in NamedTupleCursor._nt_cache) - self.assert_(('f0',) not in NamedTupleCursor._nt_cache) + # Gone from cache + curs.execute("select 1 as f0") + rec = curs.fetchone() + self.assert_(all(type(r) is not type(rec) for r in recs)) finally: - NamedTupleCursor.MAX_CACHE = old_max_cache + NamedTupleCursor._cached_make_nt = old_func def test_suite(): |