diff options
author | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2019-02-06 02:42:10 +0000 |
---|---|---|
committer | Daniele Varrazzo <daniele.varrazzo@gmail.com> | 2019-02-06 02:42:10 +0000 |
commit | f1e73507d095a71dc1173d0ccd81b6c324e692e2 (patch) | |
tree | 88669b2a2413bfc04ec68f7dca646e83c3e68643 | |
parent | 3f20f7934ad3efdf793ae81a61a842bec7adb85c (diff) | |
parent | 35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b (diff) | |
download | psycopg2-f1e73507d095a71dc1173d0ccd81b6c324e692e2.tar.gz |
Merge remote-tracking branch 'origin/fast-namedtuple'
Close #838
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | lib/_lru_cache.py | 104 | ||||
-rw-r--r-- | lib/compat.py | 5 | ||||
-rw-r--r-- | lib/extras.py | 31 | ||||
-rwxr-xr-x | tests/test_extras_dictcursor.py | 39 |
5 files changed, 170 insertions, 10 deletions
@@ -30,6 +30,7 @@ New features: maintain columns order (:ticket:`#177`). - Added `~psycopg2.extensions.Diagnostics.severity_nonlocalized` attribute on the `~psycopg2.extensions.Diagnostics` object (:ticket:`#783`). +- More efficient `~psycopg2.extras.NamedTupleCursor` (:ticket:`#838`). Other changes: diff --git a/lib/_lru_cache.py b/lib/_lru_cache.py new file mode 100644 index 0000000..1e2c52d --- /dev/null +++ b/lib/_lru_cache.py @@ -0,0 +1,104 @@ +""" +LRU cache implementation for Python 2.7 + +Ported from http://code.activestate.com/recipes/578078/ and simplified for our +use (only support maxsize > 0 and positional arguments). +""" + +from collections import namedtuple +from functools import update_wrapper +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + + +def lru_cache(maxsize=100): + """Least-recently-used cache decorator. + + Arguments to the cached function must be hashable. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize + + def wrapper(*args): + # size limited caching that tracks accesses by recency + key = args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the + # front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + # oldvalue = root[RESULT] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function diff --git a/lib/compat.py b/lib/compat.py index cfd5a88..185b8f6 100644 --- a/lib/compat.py +++ b/lib/compat.py @@ -1,10 +1,15 @@ import sys +__all__ = ['string_types', 'text_type', 'lru_cache'] + if sys.version_info[0] == 2: # Python 2 string_types = basestring, text_type = unicode + from ._lru_cache import lru_cache + else: # Python 3 string_types = str, text_type = str + from functools import lru_cache diff --git a/lib/extras.py b/lib/extras.py index 76bea85..d73c5b2 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -35,9 +35,10 @@ import logging as _logging import psycopg2 from psycopg2 import extensions as _ext -from psycopg2.extensions import cursor as _cursor -from psycopg2.extensions import connection as _connection -from psycopg2.extensions import adapt as _A, quote_ident +from .extensions import cursor as _cursor +from .extensions import connection as _connection +from .extensions import adapt as _A, quote_ident +from .compat import lru_cache from psycopg2._psycopg import ( # noqa REPLICATION_PHYSICAL, REPLICATION_LOGICAL, @@ -330,6 +331,7 @@ class NamedTupleCursor(_cursor): "abc'def" """ Record = None + MAX_CACHE = 1024 def execute(self, query, vars=None): self.Record = None @@ -381,21 +383,30 @@ class NamedTupleCursor(_cursor): except StopIteration: return + # ascii except alnum and underscore + _re_clean = _re.compile( + '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']') + def _make_nt(self): - # ascii except alnum and underscore - nochars = ' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~' - re_clean = _re.compile('[' + _re.escape(nochars) + ']') + key = tuple(d[0] for d in self.description) if self.description else () + return self._cached_make_nt(key) - def f(s): - s = re_clean.sub('_', s) + def _do_make_nt(self, key): + fields = [] + for s in key: + s = self._re_clean.sub('_', s) # Python identifier cannot start with numbers, namedtuple fields # cannot start with underscore. So... if s[0] == '_' or '0' <= s[0] <= '9': s = 'f' + s + fields.append(s) - return s + nt = namedtuple("Record", fields) + return nt - return namedtuple("Record", [f(d[0]) for d in self.description or ()]) + # Exposed for testability, and if someone wants to monkeypatch to tweak + # the cache size. + _cached_make_nt = lru_cache(512)(_do_make_nt) class LoggingConnection(_connection): diff --git a/tests/test_extras_dictcursor.py b/tests/test_extras_dictcursor.py index 38f484d..92a25b6 100755 --- a/tests/test_extras_dictcursor.py +++ b/tests/test_extras_dictcursor.py @@ -578,6 +578,45 @@ class NamedTupleCursorTest(ConnectingTestCase): for i, t in enumerate(curs): self.assertEqual(i + 1, curs.rownumber) + def test_cache(self): + curs = self.conn.cursor() + curs.execute("select 10 as a, 20 as b") + r1 = curs.fetchone() + curs.execute("select 10 as a, 20 as c") + r2 = curs.fetchone() + curs.execute("select 10 as a, 30 as b") + r3 = curs.fetchone() + + self.assert_(type(r1) is type(r3)) + self.assert_(type(r1) is not type(r2)) + + def test_max_cache(self): + from psycopg2.extras import NamedTupleCursor + from psycopg2.compat import lru_cache + + old_func = NamedTupleCursor._cached_make_nt + NamedTupleCursor._cached_make_nt = \ + lru_cache(8)(NamedTupleCursor._do_make_nt) + try: + recs = [] + curs = self.conn.cursor() + for i in range(10): + curs.execute("select 1 as f%s" % i) + recs.append(curs.fetchone()) + + # Still in cache + curs.execute("select 1 as f9") + rec = curs.fetchone() + self.assert_(any(type(r) is type(rec) for r in recs)) + + # Gone from cache + curs.execute("select 1 as f0") + rec = curs.fetchone() + self.assert_(all(type(r) is not type(rec) for r in recs)) + + finally: + NamedTupleCursor._cached_make_nt = old_func + def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) |