summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-17 23:14:44 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-08-17 23:14:44 +0000
commitf1fc1015646028bee1a8c5052e85def25847324f (patch)
tree0eb56489ce6da5f44cde7eed8f0cd032498d6959
parent57c49d7efcc51c0d10788e2c669fef3f4f057db7 (diff)
downloadpyparsing-f1fc1015646028bee1a8c5052e85def25847324f.tar.gz
Added CloseMatch class
git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@425 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES7
-rw-r--r--src/pyparsing.py67
-rw-r--r--src/unitTests.py28
3 files changed, 99 insertions, 3 deletions
diff --git a/src/CHANGES b/src/CHANGES
index c2156cc..40c08ff 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -2,6 +2,13 @@
Change Log
==========
+Version 2.1.9 -
+------------------------------
+- Added class CloseMatch, a variation on Literal which matches
+ "close" matches, that is, strings with at most 'n' mismatching
+ characters.
+
+
Version 2.1.8 -
------------------------------
- Fixed issue in the optimization to _trim_arity, when the full
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 5b63a73..269e789 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -61,7 +61,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.9"
-__versionTime__ = "15 Aug 2016 18:14 UTC"
+__versionTime__ = "17 Aug 2016 23:06 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -110,7 +110,7 @@ __all__ = [
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
-'tokenMap', 'pyparsing_common',
+'CloseMatch', 'tokenMap', 'pyparsing_common',
]
system_version = tuple(sys.version_info)[:3]
@@ -294,7 +294,7 @@ class _ParseResultsWithOffset(object):
def __getitem__(self,i):
return self.tup[i]
def __repr__(self):
- return repr(self.tup)
+ return repr(self.tup[0])
def setOffset(self,i):
self.tup = (self.tup[0],i)
@@ -2491,6 +2491,67 @@ class CaselessKeyword(Keyword):
return loc+self.matchLen, self.match
raise ParseException(instring, loc, self.errmsg, self)
+class CloseMatch(Token):
+ """
+ A variation on L{Literal} which matches "close" matches, that is,
+ strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
+ - C{match_string} - string to be matched
+ - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
+
+ The results from a successful parse will contain the matched text from the input string and the following named results:
+ - C{mismatches} - a list of the positions within the match_string where mismatches were found
+ - C{original} - the original match_string used to compare against the input string
+
+ If C{mismatches} is an empty list, then the match was an exact match.
+
+ Example::
+ patt = CloseMatch("ATCATCGAATGGA")
+ patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+ patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+ # exact match
+ patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+ # close match allowing up to 2 mismatches
+ patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+ patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+ """
+ def __init__(self, match_string, maxMismatches=1):
+ super(CloseMatch,self).__init__()
+ self.name = match_string
+ self.match_string = match_string
+ self.maxMismatches = maxMismatches
+ self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+ self.mayIndexError = False
+ self.mayReturnEmpty = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ start = loc
+ instrlen = len(instring)
+ maxloc = start + len(self.match_string)
+
+ if maxloc <= instrlen:
+ match_string = self.match_string
+ match_stringloc = 0
+ mismatches = []
+ maxMismatches = self.maxMismatches
+
+ for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
+ src,mat = s_m
+ if src != mat:
+ mismatches.append(match_stringloc)
+ if len(mismatches) > maxMismatches:
+ break
+ else:
+ loc = match_stringloc + 1
+ results = ParseResults([instring[start:loc]])
+ results['original'] = self.match_string
+ results['mismatches'] = mismatches
+ return loc, results
+
+ raise ParseException(instring, loc, self.errmsg, self)
+
+
class Word(Token):
"""
Token for matching words composed of allowed character sets.
diff --git a/src/unitTests.py b/src/unitTests.py
index b80a9f4..6a46154 100644
--- a/src/unitTests.py
+++ b/src/unitTests.py
@@ -3100,7 +3100,35 @@ class InlineLiteralsUsingTest(ParseTestCase):
result = date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
assert result.asList() == ['1999', '12', '31'], "inlineLiteralsUsing(example 2) failed!"
+class CloseMatchTest(ParseTestCase):
+ def runTest(self):
+ import pyparsing as pp
+
+ searchseq = pp.CloseMatch("ATCATCGAATGGA", 2)
+ _, results = searchseq.runTests("""
+ ATCATCGAATGGA
+ XTCATCGAATGGX
+ ATCATCGAAXGGA
+ ATCAXXGAATGGA
+ ATCAXXGAATGXA
+ ATCAXXGAATGG
+ """)
+ expected = (
+ [],
+ [0,12],
+ [9],
+ [4,5],
+ None,
+ None
+ )
+
+ for r,exp in zip(results, expected):
+ if exp is not None:
+ assert r[1].mismatches == exp, "fail CloseMatch between %r and %r" % (searchseq.sequence, r[0])
+ print(r[0], 'exc: %s' % r[1] if exp is None and isinstance(r[1], Exception)
+ else ("no match", "match")[r[1].mismatches == exp])
+
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):
import pyparsing