summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniele Varrazzo <daniele.varrazzo@gmail.com>2019-02-06 02:42:10 +0000
committerDaniele Varrazzo <daniele.varrazzo@gmail.com>2019-02-06 02:42:10 +0000
commitf1e73507d095a71dc1173d0ccd81b6c324e692e2 (patch)
tree88669b2a2413bfc04ec68f7dca646e83c3e68643
parent3f20f7934ad3efdf793ae81a61a842bec7adb85c (diff)
parent35ec7ad9c1ecb165eaeb2b8d4dae84ed8bad182b (diff)
downloadpsycopg2-f1e73507d095a71dc1173d0ccd81b6c324e692e2.tar.gz
Merge remote-tracking branch 'origin/fast-namedtuple'
Close #838
-rw-r--r--NEWS1
-rw-r--r--lib/_lru_cache.py104
-rw-r--r--lib/compat.py5
-rw-r--r--lib/extras.py31
-rwxr-xr-xtests/test_extras_dictcursor.py39
5 files changed, 170 insertions, 10 deletions
diff --git a/NEWS b/NEWS
index 1558bea..60e14e5 100644
--- a/NEWS
+++ b/NEWS
@@ -30,6 +30,7 @@ New features:
maintain columns order (:ticket:`#177`).
- Added `~psycopg2.extensions.Diagnostics.severity_nonlocalized` attribute on
the `~psycopg2.extensions.Diagnostics` object (:ticket:`#783`).
+- More efficient `~psycopg2.extras.NamedTupleCursor` (:ticket:`#838`).
Other changes:
diff --git a/lib/_lru_cache.py b/lib/_lru_cache.py
new file mode 100644
index 0000000..1e2c52d
--- /dev/null
+++ b/lib/_lru_cache.py
@@ -0,0 +1,104 @@
+"""
+LRU cache implementation for Python 2.7
+
+Ported from http://code.activestate.com/recipes/578078/ and simplified for our
+use (only support maxsize > 0 and positional arguments).
+"""
+
+from collections import namedtuple
+from functools import update_wrapper
+from threading import RLock
+
+_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
+
+
+def lru_cache(maxsize=100):
+ """Least-recently-used cache decorator.
+
+ Arguments to the cached function must be hashable.
+
+ See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
+
+ """
+ def decorating_function(user_function):
+
+ cache = dict()
+ stats = [0, 0] # make statistics updateable non-locally
+ HITS, MISSES = 0, 1 # names for the stats fields
+ cache_get = cache.get # bound method to lookup key or return None
+ _len = len # localize the global len() function
+ lock = RLock() # linkedlist updates aren't threadsafe
+ root = [] # root of the circular doubly linked list
+ root[:] = [root, root, None, None] # initialize by pointing to self
+ nonlocal_root = [root] # make updateable non-locally
+ PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
+
+ assert maxsize and maxsize > 0, "maxsize %s not supported" % maxsize
+
+ def wrapper(*args):
+ # size limited caching that tracks accesses by recency
+ key = args
+ with lock:
+ link = cache_get(key)
+ if link is not None:
+ # record recent use of the key by moving it to the
+ # front of the list
+ root, = nonlocal_root
+ link_prev, link_next, key, result = link
+ link_prev[NEXT] = link_next
+ link_next[PREV] = link_prev
+ last = root[PREV]
+ last[NEXT] = root[PREV] = link
+ link[PREV] = last
+ link[NEXT] = root
+ stats[HITS] += 1
+ return result
+ result = user_function(*args)
+ with lock:
+ root, = nonlocal_root
+ if key in cache:
+ # getting here means that this same key was added to the
+ # cache while the lock was released. since the link
+ # update is already done, we need only return the
+ # computed result and update the count of misses.
+ pass
+ elif _len(cache) >= maxsize:
+ # use the old root to store the new key and result
+ oldroot = root
+ oldroot[KEY] = key
+ oldroot[RESULT] = result
+ # empty the oldest link and make it the new root
+ root = nonlocal_root[0] = oldroot[NEXT]
+ oldkey = root[KEY]
+ # oldvalue = root[RESULT]
+ root[KEY] = root[RESULT] = None
+ # now update the cache dictionary for the new links
+ del cache[oldkey]
+ cache[key] = oldroot
+ else:
+ # put result in a new link at the front of the list
+ last = root[PREV]
+ link = [last, root, key, result]
+ last[NEXT] = root[PREV] = cache[key] = link
+ stats[MISSES] += 1
+ return result
+
+ def cache_info():
+ """Report cache statistics"""
+ with lock:
+ return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
+
+ def cache_clear():
+ """Clear the cache and cache statistics"""
+ with lock:
+ cache.clear()
+ root = nonlocal_root[0]
+ root[:] = [root, root, None, None]
+ stats[:] = [0, 0]
+
+ wrapper.__wrapped__ = user_function
+ wrapper.cache_info = cache_info
+ wrapper.cache_clear = cache_clear
+ return update_wrapper(wrapper, user_function)
+
+ return decorating_function
diff --git a/lib/compat.py b/lib/compat.py
index cfd5a88..185b8f6 100644
--- a/lib/compat.py
+++ b/lib/compat.py
@@ -1,10 +1,15 @@
import sys
+__all__ = ['string_types', 'text_type', 'lru_cache']
+
if sys.version_info[0] == 2:
# Python 2
string_types = basestring,
text_type = unicode
+ from ._lru_cache import lru_cache
+
else:
# Python 3
string_types = str,
text_type = str
+ from functools import lru_cache
diff --git a/lib/extras.py b/lib/extras.py
index 76bea85..d73c5b2 100644
--- a/lib/extras.py
+++ b/lib/extras.py
@@ -35,9 +35,10 @@ import logging as _logging
import psycopg2
from psycopg2 import extensions as _ext
-from psycopg2.extensions import cursor as _cursor
-from psycopg2.extensions import connection as _connection
-from psycopg2.extensions import adapt as _A, quote_ident
+from .extensions import cursor as _cursor
+from .extensions import connection as _connection
+from .extensions import adapt as _A, quote_ident
+from .compat import lru_cache
from psycopg2._psycopg import ( # noqa
REPLICATION_PHYSICAL, REPLICATION_LOGICAL,
@@ -330,6 +331,7 @@ class NamedTupleCursor(_cursor):
"abc'def"
"""
Record = None
+ MAX_CACHE = 1024
def execute(self, query, vars=None):
self.Record = None
@@ -381,21 +383,30 @@ class NamedTupleCursor(_cursor):
except StopIteration:
return
+ # ascii except alnum and underscore
+ _re_clean = _re.compile(
+ '[' + _re.escape(' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~') + ']')
+
def _make_nt(self):
- # ascii except alnum and underscore
- nochars = ' !"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~'
- re_clean = _re.compile('[' + _re.escape(nochars) + ']')
+ key = tuple(d[0] for d in self.description) if self.description else ()
+ return self._cached_make_nt(key)
- def f(s):
- s = re_clean.sub('_', s)
+ def _do_make_nt(self, key):
+ fields = []
+ for s in key:
+ s = self._re_clean.sub('_', s)
# Python identifier cannot start with numbers, namedtuple fields
# cannot start with underscore. So...
if s[0] == '_' or '0' <= s[0] <= '9':
s = 'f' + s
+ fields.append(s)
- return s
+ nt = namedtuple("Record", fields)
+ return nt
- return namedtuple("Record", [f(d[0]) for d in self.description or ()])
+ # Exposed for testability, and if someone wants to monkeypatch to tweak
+ # the cache size.
+ _cached_make_nt = lru_cache(512)(_do_make_nt)
class LoggingConnection(_connection):
diff --git a/tests/test_extras_dictcursor.py b/tests/test_extras_dictcursor.py
index 38f484d..92a25b6 100755
--- a/tests/test_extras_dictcursor.py
+++ b/tests/test_extras_dictcursor.py
@@ -578,6 +578,45 @@ class NamedTupleCursorTest(ConnectingTestCase):
for i, t in enumerate(curs):
self.assertEqual(i + 1, curs.rownumber)
+ def test_cache(self):
+ curs = self.conn.cursor()
+ curs.execute("select 10 as a, 20 as b")
+ r1 = curs.fetchone()
+ curs.execute("select 10 as a, 20 as c")
+ r2 = curs.fetchone()
+ curs.execute("select 10 as a, 30 as b")
+ r3 = curs.fetchone()
+
+ self.assert_(type(r1) is type(r3))
+ self.assert_(type(r1) is not type(r2))
+
+ def test_max_cache(self):
+ from psycopg2.extras import NamedTupleCursor
+ from psycopg2.compat import lru_cache
+
+ old_func = NamedTupleCursor._cached_make_nt
+ NamedTupleCursor._cached_make_nt = \
+ lru_cache(8)(NamedTupleCursor._do_make_nt)
+ try:
+ recs = []
+ curs = self.conn.cursor()
+ for i in range(10):
+ curs.execute("select 1 as f%s" % i)
+ recs.append(curs.fetchone())
+
+ # Still in cache
+ curs.execute("select 1 as f9")
+ rec = curs.fetchone()
+ self.assert_(any(type(r) is type(rec) for r in recs))
+
+ # Gone from cache
+ curs.execute("select 1 as f0")
+ rec = curs.fetchone()
+ self.assert_(all(type(r) is not type(rec) for r in recs))
+
+ finally:
+ NamedTupleCursor._cached_make_nt = old_func
+
def test_suite():
return unittest.TestLoader().loadTestsFromName(__name__)