LRU rework to use OrderedDict instead, better cross-version Python compatibility.

git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@382 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-08-04 00:02:21 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-08-04 00:02:21 +0000
commit: 81875f08ec76719a37ad444c8a7fd5ddf2e52896 (patch)
tree: 6aac6739c4f931ef40de50eedd0093476e91d3f6
parent: 5ceb5926274042bdb1a2d97088c5c4ef89c2cbb0 (diff)
download: pyparsing-81875f08ec76719a37ad444c8a7fd5ddf2e52896.tar.gz
2 files changed, 93 insertions, 50 deletions
diff --git a/src/CHANGES b/src/CHANGES
index 5b79a35..47f4858 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -4,15 +4,13 @@ Change Log
 
 Version 2.1.6 - 
 ------------------------------
-- *Major packrat upgrade*, to take advantage of functools.lru_cache, 
-  inspired by patch provided by Tal Einat - many, many, thanks to Tal 
-  for working on this! Tal's tests show faster parsing performance 
-  (2X in some tests), *and* memory reduction from 3GB down to ~100MB! 
-  functools.lru_cache is only available in Python 3.2 and later,
-  If running this on an earlier Python, this change will also try to 
-  import lru_cache from functools32, which provides a backport of 
-  lru_cache to earlier Python versions. If lru_cache cannot be imported,
-  packratting will revert to the previous unbounded dict cache.
+- *Major packrat upgrade*, inspired by patch provided by Tal Einat - 
+  many, many, thanks to Tal for working on this! Tal's tests show 
+  faster parsing performance (2X in some tests), *and* memory reduction 
+  from 3GB down to ~100MB! Requires no changes to existing code using 
+  packratting. (Uses OrderedDict, available in Python 2.7 and later.
+  For Python 2.6 users, will attempt to import from ordereddict
+  backport. If not present, will implement pure-Python Fifo dict.)
 
 - Fixed bug in pyparsing_common.numeric, integers were parsed as floats.
 
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 5fcbf5e..43216a5 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
 """
 
 __version__ = "2.1.6"
-__versionTime__ = "27 Jul 2016 19:13 UTC"
+__versionTime__ = "03 Aug 2016 23:56 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -71,6 +71,7 @@ import sre_constants
 import collections
 import pprint
 import traceback
+import types
 from datetime import datetime
 
 try:
@@ -79,13 +80,12 @@ except ImportError:
     from threading import RLock
 
 try:
-    from functools import lru_cache as _lru_cache
+    from collections import OrderedDict as _OrderedDict
 except ImportError:
     try:
-        from functools32 import lru_cache as _lru_cache
+        from ordereddict import OrderedDict as _OrderedDict
     except ImportError:
-        _lru_cache = None
-
+        _OrderedDict = None
 
 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 
@@ -1150,28 +1150,75 @@ class ParserElement(object):
         else:
             return True
 
+    class _UnboundedCache(object):
+        def __init__(self):
+            cache = {}
+            self.not_in_cache = not_in_cache = object()
 
-    # argument cache for optimizing repeated calls when backtracking through recursive expressions
-    packrat_cache = {}  # default to ordinary dict in case lru_cache not available
-    packrat_cache_lock = RLock()
-    packrat_cache_stats = [0, 0]
+            def get(self, key):
+                return cache.get(key, not_in_cache)
 
-    class _LruDictCache(object):
-        """wrapper class, to make lru_cache look like a dict
-        """
-        def __init__(self, size=None):
-            self._cache = _lru_cache(size)(lambda arg: list())
-            self.maxsize = size
+            def set(self, key, value):
+                cache[key] = value
+
+            def clear(self):
+                cache.clear()
+
+            self.get = types.MethodType(get, self)
+            self.set = types.MethodType(set, self)
+            self.clear = types.MethodType(clear, self)
+
+    if _OrderedDict is not None:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = _OrderedDict()
 
-        def setdefault(self, key, default):
-            return self._cache(key)
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
 
-        def clear(self):
-            self._cache.cache_clear()
+                def set(self, key, value):
+                    cache[key] = value
+                    if len(cache) > size:
+                        cache.popitem(False)
 
-        def stats(self):
-            return self._cache.cache_info()
+                def clear(self):
+                    cache.clear()
 
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+
+    else:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = {}
+                key_fifo = collections.deque([], size)
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    if len(cache) > size:
+                        cache.pop(key_fifo.popleft(), None)
+                    key_fifo.append(key)
+
+                def clear(self):
+                    cache.clear()
+                    key_fifo.clear()
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+
+    # argument cache for optimizing repeated calls when backtracking through recursive expressions
+    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+    packrat_cache_lock = RLock()
+    packrat_cache_stats = [0, 0]
 
     # this method gets repeatedly called during backtracking with the same arguments -
     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
@@ -1179,29 +1226,24 @@ class ParserElement(object):
         HIT, MISS = 0, 1
         lookup = (self, instring, loc, callPreParse, doActions)
         with ParserElement.packrat_cache_lock:
-            container = ParserElement.packrat_cache.setdefault(lookup, [])
-            if container:
-                # retrieved non-empty container from cache, so this is a cache hit
-                # get values or exception from container
-                value = container[0]
-                ParserElement.packrat_cache_stats[HIT] += 1
-                if isinstance(value, Exception):
-                    raise value
-                return (value[0], value[1].copy())
-            else:
-                # retrieved container is empty, so this is a cache miss (first
-                # occurrence of this lookup key); must populate the container
+            cache = ParserElement.packrat_cache
+            value = cache.get(lookup)
+            if value is cache.not_in_cache:
                 ParserElement.packrat_cache_stats[MISS] += 1
                 try:
                     value = self._parseNoCache(instring, loc, doActions, callPreParse)
-                    # saved parsed value into cached container
-                    container.append((value[0], value[1].copy()))
-                    return value
                 except ParseBaseException as pe:
-                    # construct copy of exception for cacheing (omit traceback)
-                    exc_value = pe.__class__(*pe.args)
-                    container.append(exc_value)
+                    # cache a copy of the exception, without the traceback
+                    cache.set(lookup, pe.__class__(*pe.args))
                     raise
+                else:
+                    cache.set(lookup, (value[0], value[1].copy()))
+                    return value
+            else:
+                ParserElement.packrat_cache_stats[HIT] += 1
+                if isinstance(value, Exception):
+                    raise value
+                return (value[0], value[1].copy())
 
     _parse = _parseNoCache
 
@@ -1236,10 +1278,13 @@ class ParserElement(object):
         """
         if not ParserElement._packratEnabled:
             ParserElement._packratEnabled = True
-            if _lru_cache:
-                ParserElement.packrat_cache = ParserElement._LruDictCache(cache_size_limit)
+            if cache_size_limit is None:
+                ParserElement.packrat_cache = ParserElement._UnboundedCache()
+            else:
+                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
             ParserElement._parse = ParserElement._parseCache
 
+
     def parseString( self, instring, parseAll=False ):
         """Execute the parse expression with the given string.
            This is the main interface to the client code, once the complete
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-08-04 00:02:21 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-08-04 00:02:21 +0000
commit	81875f08ec76719a37ad444c8a7fd5ddf2e52896 (patch)
tree	6aac6739c4f931ef40de50eedd0093476e91d3f6
parent	5ceb5926274042bdb1a2d97088c5c4ef89c2cbb0 (diff)
download	pyparsing-81875f08ec76719a37ad444c8a7fd5ddf2e52896.tar.gz