Added CloseMatch class

git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@425 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
author: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-08-17 23:14:44 +0000
committer: ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b> 2016-08-17 23:14:44 +0000
commit: f1fc1015646028bee1a8c5052e85def25847324f (patch)
tree: 0eb56489ce6da5f44cde7eed8f0cd032498d6959
parent: 57c49d7efcc51c0d10788e2c669fef3f4f057db7 (diff)
download: pyparsing-f1fc1015646028bee1a8c5052e85def25847324f.tar.gz
3 files changed, 99 insertions, 3 deletions
diff --git a/src/CHANGES b/src/CHANGES
index c2156cc..40c08ff 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -2,6 +2,13 @@
 Change Log
 ==========
 
+Version 2.1.9 - 
+------------------------------
+- Added class CloseMatch, a variation on Literal which matches 
+  "close" matches, that is, strings with at most 'n' mismatching
+  characters.
+
+
 Version 2.1.8 - 
 ------------------------------
 - Fixed issue in the optimization to _trim_arity, when the full 
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 5b63a73..269e789 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -61,7 +61,7 @@ The pyparsing module handles some of the problems that are typically vexing when
 """
 
 __version__ = "2.1.9"
-__versionTime__ = "15 Aug 2016 18:14 UTC"
+__versionTime__ = "17 Aug 2016 23:06 UTC"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -110,7 +110,7 @@ __all__ = [
 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
-'tokenMap', 'pyparsing_common',
+'CloseMatch', 'tokenMap', 'pyparsing_common',
 ]
 
 system_version = tuple(sys.version_info)[:3]
@@ -294,7 +294,7 @@ class _ParseResultsWithOffset(object):
     def __getitem__(self,i):
         return self.tup[i]
     def __repr__(self):
-        return repr(self.tup)
+        return repr(self.tup[0])
     def setOffset(self,i):
         self.tup = (self.tup[0],i)
 
@@ -2491,6 +2491,67 @@ class CaselessKeyword(Keyword):
             return loc+self.matchLen, self.match
         raise ParseException(instring, loc, self.errmsg, self)
 
+class CloseMatch(Token):
+    """
+    A variation on L{Literal} which matches "close" matches, that is, 
+    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
+     - C{match_string} - string to be matched
+     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
+    
+    The results from a successful parse will contain the matched text from the input string and the following named results:
+     - C{mismatches} - a list of the positions within the match_string where mismatches were found
+     - C{original} - the original match_string used to compare against the input string
+    
+    If C{mismatches} is an empty list, then the match was an exact match.
+    
+    Example::
+        patt = CloseMatch("ATCATCGAATGGA")
+        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+        # exact match
+        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+        # close match allowing up to 2 mismatches
+        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+    """
+    def __init__(self, match_string, maxMismatches=1):
+        super(CloseMatch,self).__init__()
+        self.name = match_string
+        self.match_string = match_string
+        self.maxMismatches = maxMismatches
+        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+        self.mayIndexError = False
+        self.mayReturnEmpty = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        start = loc
+        instrlen = len(instring)
+        maxloc = start + len(self.match_string)
+
+        if maxloc <= instrlen:
+            match_string = self.match_string
+            match_stringloc = 0
+            mismatches = []
+            maxMismatches = self.maxMismatches
+
+            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
+                src,mat = s_m
+                if src != mat:
+                    mismatches.append(match_stringloc)
+                    if len(mismatches) > maxMismatches:
+                        break
+            else:
+                loc = match_stringloc + 1
+                results = ParseResults([instring[start:loc]])
+                results['original'] = self.match_string
+                results['mismatches'] = mismatches
+                return loc, results
+
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
 class Word(Token):
     """
     Token for matching words composed of allowed character sets.
diff --git a/src/unitTests.py b/src/unitTests.py
index b80a9f4..6a46154 100644
--- a/src/unitTests.py
+++ b/src/unitTests.py
@@ -3100,7 +3100,35 @@ class InlineLiteralsUsingTest(ParseTestCase):
             result = date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
             assert result.asList() == ['1999', '12', '31'], "inlineLiteralsUsing(example 2) failed!"
 
+class CloseMatchTest(ParseTestCase):
+    def runTest(self):
+        import pyparsing as pp
+        
+        searchseq = pp.CloseMatch("ATCATCGAATGGA", 2)
 
+        _, results = searchseq.runTests("""
+            ATCATCGAATGGA
+            XTCATCGAATGGX
+            ATCATCGAAXGGA
+            ATCAXXGAATGGA
+            ATCAXXGAATGXA
+            ATCAXXGAATGG
+            """)
+        expected = (
+            [],
+            [0,12],
+            [9],
+            [4,5],
+            None,
+            None
+            )
+
+        for r,exp in zip(results, expected):
+            if exp is not None:
+                assert r[1].mismatches == exp, "fail CloseMatch between %r and %r" % (searchseq.sequence, r[0])
+            print(r[0], 'exc: %s' % r[1] if exp is None and isinstance(r[1], Exception) 
+                                          else ("no match", "match")[r[1].mismatches == exp])
+        
 class MiscellaneousParserTests(ParseTestCase):
     def runTest(self):
         import pyparsing
author	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-08-17 23:14:44 +0000
committer	ptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>	2016-08-17 23:14:44 +0000
commit	f1fc1015646028bee1a8c5052e85def25847324f (patch)
tree	0eb56489ce6da5f44cde7eed8f0cd032498d6959
parent	57c49d7efcc51c0d10788e2c669fef3f4f057db7 (diff)
download	pyparsing-f1fc1015646028bee1a8c5052e85def25847324f.tar.gz