summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-06-13 20:24:24 +0000
committerptmcg <ptmcg@9bf210a0-9d2d-494c-87cf-cfb32e7dff7b>2016-06-13 20:24:24 +0000
commit191c56031a16a6a7910613fa2be77a670d225996 (patch)
treea3d34868cf4a29037f012362869144457dfdf611
parentec812fbba1eba0eca156b7d740a367867a9102f7 (diff)
downloadpyparsing-191c56031a16a6a7910613fa2be77a670d225996.tar.gz
Added ParserElement.split() generator method
Also fixed minor blip in originalTextFor, to inherit ignored exprs from the embedded expression. git-svn-id: svn://svn.code.sf.net/p/pyparsing/code/trunk@369 9bf210a0-9d2d-494c-87cf-cfb32e7dff7b
-rw-r--r--src/CHANGES5
-rw-r--r--src/pyparsing.py18
-rw-r--r--src/unitTests.py96
3 files changed, 118 insertions, 1 deletions
diff --git a/src/CHANGES b/src/CHANGES
index 10c5593..799cdf8 100644
--- a/src/CHANGES
+++ b/src/CHANGES
@@ -4,6 +4,11 @@ Change Log
Verison 2.1.5 - June, 2016
------------------------------
+- Added ParserElement.split() generator method, similar to re.split().
+ Includes optional arguments maxsplit (to limit the number of splits),
+ and includeSeparators (to include the separating matched text in the
+ returned output, default=False).
+
- Added a new parse action construction helper tokenMap, which will
apply a function and optional arguments to each element in a
ParseResults. So this parse action:
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 5fb3fbc..ec678d0 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -58,7 +58,7 @@ The pyparsing module handles some of the problems that are typically vexing when
"""
__version__ = "2.1.5"
-__versionTime__ = "12 Jun 2016 21:53 UTC"
+__versionTime__ = "13 Jun 2016 19:59 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -1326,6 +1326,21 @@ class ParserElement(object):
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
+ def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
+ """Generator method to split a string using the given expression as a separator.
+ May be called with optional C{maxsplit} argument, to limit the number of splits;
+ and the optional C{includeSeparators} argument (default=C{False}), if the separating
+ matching text should be included in the split results.
+ """
+ splits = 0
+ last = 0
+ for t,s,e in self.scanString(instring, maxMatches=maxsplit):
+ yield instring[last:s]
+ if includeSeparators:
+ yield t[0]
+ last = e
+ yield instring[last:]
+
def __add__(self, other ):
"""Implementation of + operator - returns C{L{And}}"""
if isinstance( other, basestring ):
@@ -3525,6 +3540,7 @@ def originalTextFor(expr, asString=True):
def extractText(s,l,t):
t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
matchExpr.setParseAction(extractText)
+ matchExpr.ignoreExprs = expr.ignoreExprs
return matchExpr
def ungroup(expr):
diff --git a/src/unitTests.py b/src/unitTests.py
index 9987a58..90cbc2a 100644
--- a/src/unitTests.py
+++ b/src/unitTests.py
@@ -2822,6 +2822,102 @@ class HTMLStripperTest(ParseTestCase):
result = read_everything.parseString(sample)
assert result[0].strip() == 'Here is some sample HTML text.'
+class ExprSplitterTest(ParseTestCase):
+ def runTest(self):
+
+ from pyparsing import Literal, quotedString, pythonStyleComment, Empty
+
+ expr = Literal(';') + Empty()
+ expr.ignore(quotedString)
+ expr.ignore(pythonStyleComment)
+
+
+ sample = """
+ def main():
+ this_semi_does_nothing();
+ neither_does_this_but_there_are_spaces_afterward();
+ a = "a;b"; return a # this is a comment; it has a semicolon!
+
+ def b():
+ if False:
+ z=1000;b("; in quotes"); c=200;return z
+ return ';'
+
+ class Foo(object):
+ def bar(self):
+ '''a docstring; with a semicolon'''
+ a = 10; b = 11; c = 12
+
+ # this comment; has several; semicolons
+ if self.spam:
+ x = 12; return x # so; does; this; one
+ x = 15;;; y += x; return y
+
+ def baz(self):
+ return self.bar
+ """
+ expected = [
+ [' this_semi_does_nothing()', ''],
+ [' neither_does_this_but_there_are_spaces_afterward()', ''],
+ [' a = "a;b"', 'return a # this is a comment; it has a semicolon!'],
+ [' z=1000', 'b("; in quotes")', 'c=200', 'return z'],
+ [" return ';'"],
+ [" '''a docstring; with a semicolon'''"],
+ [' a = 10', 'b = 11', 'c = 12'],
+ [' # this comment; has several; semicolons'],
+ [' x = 12', 'return x # so; does; this; one'],
+ [' x = 15', '', '', 'y += x', 'return y'],
+ ]
+
+ exp_iter = iter(expected)
+ for line in filter(lambda ll: ';' in ll, sample.splitlines()):
+ print_(str(list(expr.split(line)))+',')
+ assert list(expr.split(line)) == next(exp_iter), "invalid split on expression"
+
+ print_()
+
+ expected = [
+ [' this_semi_does_nothing()', ';', ''],
+ [' neither_does_this_but_there_are_spaces_afterward()', ';', ''],
+ [' a = "a;b"', ';', 'return a # this is a comment; it has a semicolon!'],
+ [' z=1000', ';', 'b("; in quotes")', ';', 'c=200', ';', 'return z'],
+ [" return ';'"],
+ [" '''a docstring; with a semicolon'''"],
+ [' a = 10', ';', 'b = 11', ';', 'c = 12'],
+ [' # this comment; has several; semicolons'],
+ [' x = 12', ';', 'return x # so; does; this; one'],
+ [' x = 15', ';', '', ';', '', ';', 'y += x', ';', 'return y'],
+ ]
+ exp_iter = iter(expected)
+ for line in filter(lambda ll: ';' in ll, sample.splitlines()):
+ print_(str(list(expr.split(line, includeSeparators=True)))+',')
+ assert list(expr.split(line, includeSeparators=True)) == next(exp_iter), "invalid split on expression"
+
+ print_()
+
+
+ expected = [
+ [' this_semi_does_nothing()', ''],
+ [' neither_does_this_but_there_are_spaces_afterward()', ''],
+ [' a = "a;b"', 'return a # this is a comment; it has a semicolon!'],
+ [' z=1000', 'b("; in quotes"); c=200;return z'],
+ [' a = 10', 'b = 11; c = 12'],
+ [' x = 12', 'return x # so; does; this; one'],
+ [' x = 15', ';; y += x; return y'],
+ ]
+ exp_iter = iter(expected)
+ for line in sample.splitlines():
+ pieces = list(expr.split(line, maxsplit=1))
+ print_(str(pieces)+',')
+ if len(pieces) == 2:
+ exp = next(exp_iter)
+ assert pieces == exp, "invalid split on expression with maxSplits=1"
+ elif len(pieces) == 1:
+ assert len(expr.searchString(line)) == 0, "invalid split with maxSplits=1 when expr not present"
+ else:
+ print_("\n>>> " + line)
+ assert False, "invalid split on expression with maxSplits=1, corner case"
+
class MiscellaneousParserTests(ParseTestCase):
def runTest(self):