summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-04 00:02:21 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-04 00:02:21 +0000
commit81875f08ec76719a37ad444c8a7fd5ddf2e52896 (patch)
tree6aac6739c4f931ef40de50eedd0093476e91d3f6
parent5ceb5926274042bdb1a2d97088c5c4ef89c2cbb0 (diff)
downloadpyparsing-81875f08ec76719a37ad444c8a7fd5ddf2e52896.tar.gz
LRU rework to use OrderedDict instead, better cross-version Python compatibility.
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@382 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES16
-rw-r--r--src/pyparsing.py127
2 files changed, 93 insertions, 50 deletions
diff --git a/src/CHANGES b/src/CHANGES
index 5b79a35..47f4858 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -4,15 +4,13 @@ Change Log
Version 2.1.6 -
------------------------------
-- *Major packrat upgrade*, to take advantage of functools.lru_cache,
- inspired by patch provided by Tal Einat - many, many, thanks to Tal
- for working on this! Tal's tests show faster parsing performance
- (2X in some tests), *and* memory reduction from 3GB down to ~100MB!
- functools.lru_cache is only available in Python 3.2 and later,
- If running this on an earlier Python, this change will also try to
- import lru_cache from functools32, which provides a backport of
- lru_cache to earlier Python versions. If lru_cache cannot be imported,
- packratting will revert to the previous unbounded dict cache.
+- *Major packrat upgrade*, inspired by patch provided by Tal Einat -
+ many, many, thanks to Tal for working on this! Tal's tests show
+ faster parsing performance (2X in some tests), *and* memory reduction
+ from 3GB down to ~100MB! Requires no changes to existing code using
+ packratting. (Uses OrderedDict, available in Python 2.7 and later.
+ For Python 2.6 users, will attempt to import from ordereddict
+ backport. If not present, will implement pure-Python Fifo dict.)
- Fixed bug in pyparsing_common.numeric, integers were parsed as floats.
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 5fcbf5e..43216a5 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.6"
-__versionTime__ = "27 Jul 2016 19:13 UTC"
+__versionTime__ = "03 Aug 2016 23:56 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -71,6 +71,7 @@ import sre_constants
import collections
import pprint
import traceback
+import types
from datetime import datetime
try:
@@ -79,13 +80,12 @@ except ImportError:
from threading import RLock
try:
- from functools import lru_cache as _lru_cache
+ from collections import OrderedDict as _OrderedDict
except ImportError:
try:
- from functools32 import lru_cache as _lru_cache
+ from ordereddict import OrderedDict as _OrderedDict
except ImportError:
- _lru_cache = None
-
+ _OrderedDict = None
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
@@ -1150,28 +1150,75 @@ class ParserElement(object):
else:
return True
+ class _UnboundedCache(object):
+ def __init__(self):
+ cache = {}
+ self.not_in_cache = not_in_cache = object()
- # argument cache for optimizing repeated calls when backtracking through recursive expressions
- packrat_cache = {} # default to ordinary dict in case lru_cache not available
- packrat_cache_lock = RLock()
- packrat_cache_stats = [0, 0]
+ def get(self, key):
+ return cache.get(key, not_in_cache)
- class _LruDictCache(object):
- """wrapper class, to make lru_cache look like a dict
- """
- def __init__(self, size=None):
- self._cache = _lru_cache(size)(lambda arg: list())
- self.maxsize = size
+ def set(self, key, value):
+ cache[key] = value
+
+ def clear(self):
+ cache.clear()
+
+ self.get = types.MethodType(get, self)
+ self.set = types.MethodType(set, self)
+ self.clear = types.MethodType(clear, self)
+
+ if _OrderedDict is not None:
+ class _FifoCache(object):
+ def __init__(self, size):
+ self.not_in_cache = not_in_cache = object()
+
+ cache = _OrderedDict()
- def setdefault(self, key, default):
- return self._cache(key)
+ def get(self, key):
+ return cache.get(key, not_in_cache)
- def clear(self):
- self._cache.cache_clear()
+ def set(self, key, value):
+ cache[key] = value
+ if len(cache) > size:
+ cache.popitem(False)
- def stats(self):
- return self._cache.cache_info()
+ def clear(self):
+ cache.clear()
+ self.get = types.MethodType(get, self)
+ self.set = types.MethodType(set, self)
+ self.clear = types.MethodType(clear, self)
+
+ else:
+ class _FifoCache(object):
+ def __init__(self, size):
+ self.not_in_cache = not_in_cache = object()
+
+ cache = {}
+ key_fifo = collections.deque([], size)
+
+ def get(self, key):
+ return cache.get(key, not_in_cache)
+
+ def set(self, key, value):
+ cache[key] = value
+ if len(cache) > size:
+ cache.pop(key_fifo.popleft(), None)
+ key_fifo.append(key)
+
+ def clear(self):
+ cache.clear()
+ key_fifo.clear()
+
+ self.get = types.MethodType(get, self)
+ self.set = types.MethodType(set, self)
+ self.clear = types.MethodType(clear, self)
+
+ # argument cache for optimizing repeated calls when backtracking through recursive expressions
+ packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+ packrat_cache_lock = RLock()
+ packrat_cache_stats = [0, 0]
# this method gets repeatedly called during backtracking with the same arguments -
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
@@ -1179,29 +1226,24 @@ class ParserElement(object):
HIT, MISS = 0, 1
lookup = (self, instring, loc, callPreParse, doActions)
with ParserElement.packrat_cache_lock:
- container = ParserElement.packrat_cache.setdefault(lookup, [])
- if container:
- # retrieved non-empty container from cache, so this is a cache hit
- # get values or exception from container
- value = container[0]
- ParserElement.packrat_cache_stats[HIT] += 1
- if isinstance(value, Exception):
- raise value
- return (value[0], value[1].copy())
- else:
- # retrieved container is empty, so this is a cache miss (first
- # occurrence of this lookup key); must populate the container
+ cache = ParserElement.packrat_cache
+ value = cache.get(lookup)
+ if value is cache.not_in_cache:
ParserElement.packrat_cache_stats[MISS] += 1
try:
value = self._parseNoCache(instring, loc, doActions, callPreParse)
- # saved parsed value into cached container
- container.append((value[0], value[1].copy()))
- return value
except ParseBaseException as pe:
- # construct copy of exception for cacheing (omit traceback)
- exc_value = pe.__class__(*pe.args)
- container.append(exc_value)
+ # cache a copy of the exception, without the traceback
+ cache.set(lookup, pe.__class__(*pe.args))
raise
+ else:
+ cache.set(lookup, (value[0], value[1].copy()))
+ return value
+ else:
+ ParserElement.packrat_cache_stats[HIT] += 1
+ if isinstance(value, Exception):
+ raise value
+ return (value[0], value[1].copy())
_parse = _parseNoCache
@@ -1236,10 +1278,13 @@ class ParserElement(object):
"""
if not ParserElement._packratEnabled:
ParserElement._packratEnabled = True
- if _lru_cache:
- ParserElement.packrat_cache = ParserElement._LruDictCache(cache_size_limit)
+ if cache_size_limit is None:
+ ParserElement.packrat_cache = ParserElement._UnboundedCache()
+ else:
+ ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
ParserElement._parse = ParserElement._parseCache
+
def parseString( self, instring, parseAll=False ):
"""Execute the parse expression with the given string.
This is the main interface to the client code, once the complete