Use a proper LRU cache for namedtuplesfast-namedtuple

Previous one didn't refresh by last use. Use the stdlib version for py3 and one of our own for py2. Max size set to 512, which should be fine for everyone (tweaking is still possible by monkeypatching, as the tests do, but I don't want to make an interface of it).
author: Daniele Varrazzo <daniele.varrazzo@gmail.com> 2019-02-02 19:21:39 +0000
committer: Daniele Varrazzo <daniele.varrazzo@gmail.com> 2019-02-02 19:29:20 +0000
commit: 35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b (patch)
tree: 88669b2a2413bfc04ec68f7dca646e83c3e68643
parent: 805527fcd6bedc2671a37c59299c1fa9148f9e6e (diff)
download: psycopg2-35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b.tar.gz
4 files changed, 138 insertions, 26 deletions
diff --git a/lib/_lru_cache.py b/lib/_lru_cache.py
new file mode 100644
index 0000000..1e2c52d
--- /dev/null
+++ b/lib/_lru_cache.py
@@ -0,0 +1,104 @@
+"""
+LRU cache implementation for Python 2.7
+
+Ported from http://code.activestate.com/recipes/578078/ and simplified for our
+use (only support maxsize > 0 and positional arguments).
+"""
+
+from collections import namedtuple
+from functools import update_wrapper
+from threading import RLock
+
+_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
+
+
+def lru_cache(maxsize=100):
+    """Least-recently-used cache decorator.
+
+    Arguments to the cached function must be hashable.
+
+    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
+
+    """
+    def decorating_function(user_function):
+
+        cache = dict()
+        stats = [0, 0]                  # make statistics updateable non-locally
+        HITS, MISSES = 0, 1             # names for the stats fields
+        cache_get = cache.get           # bound method to lookup key or return None
+        _len = len                      # localize the global len() function
+        lock = RLock()                  # linkedlist updates aren't threadsafe
+        root = []                       # root of the circular doubly linked list
+        root[:] = [root, root, None, None]      # initialize by pointing to self
+        nonlocal_root = [root]                  # make updateable non-locally
+        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3    # names for the link fields
+
+        assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize
+
+        def wrapper(*args):
+            # size limited caching that tracks accesses by recency
+            key = args
+            with lock:
+                link = cache_get(key)
+                if link is not None:
+                    # record recent use of the key by moving it to the
+                    # front of the list
+                    root, = nonlocal_root
+                    link_prev, link_next, key, result = link
+                    link_prev[NEXT] = link_next
+                    link_next[PREV] = link_prev
+                    last = root[PREV]
+                    last[NEXT] = root[PREV] = link
+                    link[PREV] = last
+                    link[NEXT] = root
+                    stats[HITS] += 1
+                    return result
+            result = user_function(*args)
+            with lock:
+                root, = nonlocal_root
+                if key in cache:
+                    # getting here means that this same key was added to the
+                    # cache while the lock was released.  since the link
+                    # update is already done, we need only return the
+                    # computed result and update the count of misses.
+                    pass
+                elif _len(cache) >= maxsize:
+                    # use the old root to store the new key and result
+                    oldroot = root
+                    oldroot[KEY] = key
+                    oldroot[RESULT] = result
+                    # empty the oldest link and make it the new root
+                    root = nonlocal_root[0] = oldroot[NEXT]
+                    oldkey = root[KEY]
+                    # oldvalue = root[RESULT]
+                    root[KEY] = root[RESULT] = None
+                    # now update the cache dictionary for the new links
+                    del cache[oldkey]
+                    cache[key] = oldroot
+                else:
+                    # put result in a new link at the front of the list
+                    last = root[PREV]
+                    link = [last, root, key, result]
+                    last[NEXT] = root[PREV] = cache[key] = link
+                stats[MISSES] += 1
+            return result
+
+        def cache_info():
+            """Report cache statistics"""
+            with lock:
+                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
+
+        def cache_clear():
+            """Clear the cache and cache statistics"""
+            with lock:
+                cache.clear()
+                root = nonlocal_root[0]
+                root[:] = [root, root, None, None]
+                stats[:] = [0, 0]
+
+        wrapper.__wrapped__ = user_function
+        wrapper.cache_info = cache_info
+        wrapper.cache_clear = cache_clear
+        return update_wrapper(wrapper, user_function)
+
+    return decorating_function
diff --git a/lib/compat.py b/lib/compat.py
index cfd5a88..185b8f6 100644
--- a/lib/compat.py
+++ b/lib/compat.py
@@ -1,10 +1,15 @@
 import sys
 
+__all__ = ['string_types', 'text_type', 'lru_cache']
+
 if sys.version_info[0] == 2:
     # Python 2
     string_types = basestring,
     text_type = unicode
+    from ._lru_cache import lru_cache
+
 else:
     # Python 3
     string_types = str,
     text_type = str
+    from functools import lru_cache
diff --git a/lib/extras.py b/lib/extras.py
index a4012e2..d73c5b2 100644
--- a/lib/extras.py
+++ b/lib/extras.py
@@ -35,9 +35,10 @@ import logging as _logging
 
 import psycopg2
 from psycopg2 import extensions as _ext
-from psycopg2.extensions import cursor as _cursor
-from psycopg2.extensions import connection as _connection
-from psycopg2.extensions import adapt as _A, quote_ident
+from .extensions import cursor as _cursor
+from .extensions import connection as _connection
+from .extensions import adapt as _A, quote_ident
+from .compat import lru_cache
 
 from psycopg2._psycopg import (                             # noqa
     REPLICATION_PHYSICAL, REPLICATION_LOGICAL,
@@ -386,28 +387,27 @@ class NamedTupleCursor(_cursor):
     _re_clean = _re.compile(
         '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']')
 
-    _nt_cache = OrderedDict()
-
     def _make_nt(self):
-        key = tuple(d[0] for d in (self.description or ()))
-        nt = self._nt_cache.get(key)
-        if nt is not None:
-            return nt
+        key = tuple(d[0] for d in self.description) if self.description else ()
+        return self._cached_make_nt(key)
 
+    def _do_make_nt(self, key):
         fields = []
         for s in key:
             s = self._re_clean.sub('_', s)
+            # Python identifier cannot start with numbers, namedtuple fields
+            # cannot start with underscore. So...
             if s[0] == '_' or '0' <= s[0] <= '9':
                 s = 'f' + s
             fields.append(s)
 
         nt = namedtuple("Record", fields)
-        self._nt_cache[key] = nt
-        while len(self._nt_cache) > self.MAX_CACHE:
-            self._nt_cache.popitem(last=False)
-
         return nt
 
+    # Exposed for testability, and if someone wants to monkeypatch to tweak
+    # the cache size.
+    _cached_make_nt = lru_cache(512)(_do_make_nt)
+
 
 class LoggingConnection(_connection):
     """A connection that logs all queries to a file or logger__ object.
diff --git a/tests/test_extras_dictcursor.py b/tests/test_extras_dictcursor.py
index e08da5f..92a25b6 100755
--- a/tests/test_extras_dictcursor.py
+++ b/tests/test_extras_dictcursor.py
@@ -592,27 +592,30 @@ class NamedTupleCursorTest(ConnectingTestCase):
 
     def test_max_cache(self):
         from psycopg2.extras import NamedTupleCursor
-        old_max_cache = NamedTupleCursor.MAX_CACHE
-        NamedTupleCursor.MAX_CACHE = 10
+        from psycopg2.compat import lru_cache
+
+        old_func = NamedTupleCursor._cached_make_nt
+        NamedTupleCursor._cached_make_nt = \
+            lru_cache(8)(NamedTupleCursor._do_make_nt)
         try:
-            NamedTupleCursor._nt_cache.clear()
+            recs = []
             curs = self.conn.cursor()
             for i in range(10):
                 curs.execute("select 1 as f%s" % i)
-                curs.fetchone()
+                recs.append(curs.fetchone())
 
-            self.assertEqual(len(NamedTupleCursor._nt_cache), 10)
-            for i in range(10):
-                self.assert_(('f%s' % i,) in NamedTupleCursor._nt_cache)
+            # Still in cache
+            curs.execute("select 1 as f9")
+            rec = curs.fetchone()
+            self.assert_(any(type(r) is type(rec) for r in recs))
 
-            curs.execute("select 1 as f10")
-            curs.fetchone()
-            self.assertEqual(len(NamedTupleCursor._nt_cache), 10)
-            self.assert_(('f10',) in NamedTupleCursor._nt_cache)
-            self.assert_(('f0',) not in NamedTupleCursor._nt_cache)
+            # Gone from cache
+            curs.execute("select 1 as f0")
+            rec = curs.fetchone()
+            self.assert_(all(type(r) is not type(rec) for r in recs))
 
         finally:
-            NamedTupleCursor.MAX_CACHE = old_max_cache
+            NamedTupleCursor._cached_make_nt = old_func
 
 
 def test_suite():
author	Daniele Varrazzo <daniele.varrazzo@gmail.com>	2019-02-02 19:21:39 +0000
committer	Daniele Varrazzo <daniele.varrazzo@gmail.com>	2019-02-02 19:29:20 +0000
commit	35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b (patch)
tree	88669b2a2413bfc04ec68f7dca646e83c3e68643
parent	805527fcd6bedc2671a37c59299c1fa9148f9e6e (diff)
download	psycopg2-35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b.tar.gz