summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES30
-rw-r--r--examples/test_bibparse.py8
-rw-r--r--pyparsing/__init__.py2
-rw-r--r--pyparsing/core.py41
-rw-r--r--tests/test_unit.py61
5 files changed, 102 insertions, 40 deletions
diff --git a/CHANGES b/CHANGES
index c777763..59189ca 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,9 +4,33 @@ Change Log
Version 3.0.2 -
---------------
-- Performance enhancement to `one_of` to always generate `regex`, even
- if `caseless` or `as_keyword` args are given as `True` (unless explicitly
- disabled by passing `use_regex=True`).
+- Reverted change in behavior with LineStart and StringStart, which changed the
+ interpretation of when and how LineStart and StringStart should match when
+ a line starts with spaces. In 3.0.0, the xxxStart expressions were not
+ really treated like expressions in their own right, but as modifiers to the
+ following expression when used like `LineStart() + expr`, so that if there
+ were whitespace on the line before `expr` (which would match in versions prior
+ to 3.0.0), the match would fail.
+
+ 3.0.0 implemented this by automatically promoting `LineStart() + expr` to
+ `AtLineStart(expr)`, which broke existing parsers that did not expect `expr` to
+ necessarily be right at the start of the line, but only be the first token
+ found on the line. This was reported as a regression in Issue #317.
+
+ In 3.0.2, pyparsing reverts to the previous behavior, but will retain the new
+ `AtLineStart` and `AtStringStart` expression classes, so that parsers can chose
+ whichever behavior applies in their specific instance. Specifically:
+
+ # matches expr if it is the first token on the line
+ # (allows for leading whitespace)
+ LineStart() + expr
+
+ # matches only if expr is found in column 1
+ AtLineStart(expr)
+
+- Performance enhancement to `one_of` to always generate an internal `Regex`,
+ even if `caseless` or `as_keyword` args are given as `True` (unless explicitly
+ disabled by passing `use_regex=False`).
Version 3.0.1 -
diff --git a/examples/test_bibparse.py b/examples/test_bibparse.py
index 9857ab4..b1a55c5 100644
--- a/examples/test_bibparse.py
+++ b/examples/test_bibparse.py
@@ -57,22 +57,22 @@ class TestBibparse(unittest.TestCase):
self.assertEqual(obj.parseString("{}").asList(), [])
self.assertEqual(obj.parseString('{a "string}')[0], 'a "string')
self.assertEqual(
- ["a ", ["nested"], "string"],
+ ["a ", ["nested"], " string"],
obj.parseString("{a {nested} string}").asList(),
)
self.assertEqual(
- ["a ", ["double ", ["nested"]], "string"],
+ ["a ", ["double ", ["nested"]], " string"],
obj.parseString("{a {double {nested}} string}").asList(),
)
for obj in (bp.quoted_string, bp.string, bp.field_value):
self.assertEqual([], obj.parseString('""').asList())
self.assertEqual("a string", obj.parseString('"a string"')[0])
self.assertEqual(
- ["a ", ["nested"], "string"],
+ ["a ", ["nested"], " string"],
obj.parseString('"a {nested} string"').asList(),
)
self.assertEqual(
- ["a ", ["double ", ["nested"]], "string"],
+ ["a ", ["double ", ["nested"]], " string"],
obj.parseString('"a {double {nested}} string"').asList(),
)
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index bf0fe81..a487736 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + (
),
"",
)[__version_info__.release_level == "final"]
-__version_time__ = "26 October 2021 20:39 UTC"
+__version_time__ = "26 October 2021 23:54 UTC"
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/pyparsing/core.py b/pyparsing/core.py
index b1c194b..775c7b4 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -2009,6 +2009,8 @@ class ParserElement(ABC):
(Note that this is a raw string literal, you must include the leading ``'r'``.)
"""
+ from .testing import pyparsing_test
+
parseAll = parseAll and parse_all
fullDump = fullDump and full_dump
printResults = printResults and print_results
@@ -2030,11 +2032,14 @@ class ParserElement(ABC):
BOM = "\ufeff"
for t in tests:
if comment is not None and comment.matches(t, False) or comments and not t:
- comments.append(t)
+ comments.append(pyparsing_test.with_line_numbers(t))
continue
if not t:
continue
- out = ["\n" + "\n".join(comments) if comments else "", t]
+ out = [
+ "\n" + "\n".join(comments) if comments else "",
+ pyparsing_test.with_line_numbers(t),
+ ]
comments = []
try:
# convert newline marks to actual newlines, and strip leading BOM if present
@@ -2042,11 +2047,7 @@ class ParserElement(ABC):
result = self.parse_string(t, parse_all=parseAll)
except ParseBaseException as pe:
fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
- if "\n" in t:
- out.append(line(pe.loc, t))
- out.append(" " * (col(pe.loc, t) - 1) + "^" + fatal)
- else:
- out.append(" " * pe.loc + "^" + fatal)
+ out.append(pe.explain())
out.append("FAIL: " + str(pe))
success = success and failureTests
result = pe
@@ -3388,22 +3389,20 @@ class LineStart(_PositionToken):
def __init__(self):
super().__init__()
+ self.leave_whitespace()
+ self.orig_whiteChars = set() | self.whiteChars
+ self.whiteChars.discard("\n")
+ self.skipper = Empty().set_whitespace_chars(self.whiteChars)
self.errmsg = "Expected start of line"
- def __add__(self, other):
- return AtLineStart(other)
-
- def __sub__(self, other):
- return AtLineStart(other) - Empty()
-
def preParse(self, instring, loc):
if loc == 0:
return loc
else:
- if instring[loc : loc + 1] == "\n" and "\n" in self.whiteChars:
- ret = loc + 1
- else:
- ret = super().preParse(instring, loc)
+ ret = self.skipper.preParse(instring, loc)
+ if "\n" in self.orig_whiteChars:
+ while instring[ret : ret + 1] == "\n":
+ ret = self.skipper.preParse(instring, ret + 1)
return ret
def parseImpl(self, instring, loc, doActions=True):
@@ -3444,12 +3443,6 @@ class StringStart(_PositionToken):
super().__init__()
self.errmsg = "Expected start of text"
- def __add__(self, other):
- return AtStringStart(other)
-
- def __sub__(self, other):
- return AtStringStart(other) - Empty()
-
def parseImpl(self, instring, loc, doActions=True):
if loc != 0:
# see if entire string up to here is just whitespace and ignoreables
@@ -3835,6 +3828,7 @@ class Or(ParseExpression):
super().__init__(exprs, savelist)
if self.exprs:
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+ self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
else:
self.mayReturnEmpty = True
@@ -3976,6 +3970,7 @@ class MatchFirst(ParseExpression):
if self.exprs:
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
self.callPreparse = all(e.callPreparse for e in self.exprs)
+ self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
else:
self.mayReturnEmpty = True
diff --git a/tests/test_unit.py b/tests/test_unit.py
index fe4253d..a5c8801 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -3587,14 +3587,14 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
"""
test = dedent(test)
- print(test)
+ print(pp.testing.with_line_numbers(test))
print("normal parsing")
for t, s, e in (pp.LineStart() + "AAA").scanString(test):
- print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s]))
+ print(s, e, pp.lineno(s, test), pp.line(s, test), repr(t))
print()
self.assertEqual(
- "A", test[s], "failed LineStart with insignificant newlines"
+ "A", t[0][0], "failed LineStart with insignificant newlines"
)
print(r"parsing without \n in whitespace chars")
@@ -3604,10 +3604,10 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s]))
print()
self.assertEqual(
- "A", test[s], "failed LineStart with insignificant newlines"
+ "A", t[0][0], "failed LineStart with insignificant newlines"
)
- def testLineStart3(self):
+ def testLineStartWithLeadingSpaces(self):
# testing issue #272
instring = dedent(
"""
@@ -3634,16 +3634,21 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
alpha_line | pp.Word("_"),
alpha_line | alpha_line,
pp.MatchFirst([alpha_line, alpha_line]),
+ alpha_line ^ pp.Word("_"),
+ alpha_line ^ alpha_line,
+ pp.Or([alpha_line, pp.Word("_")]),
pp.LineStart() + pp.Word(pp.alphas) + pp.LineEnd().suppress(),
pp.And([pp.LineStart(), pp.Word(pp.alphas), pp.LineEnd().suppress()]),
]
+ fails = []
for test in tests:
print(test.searchString(instring))
- self.assertEqual(
- ["a", "d", "e"], flatten(sum(test.search_string(instring)).as_list())
- )
+ if ['a', 'b', 'c', 'd', 'e', 'f', 'g'] != flatten(sum(test.search_string(instring)).as_list()):
+ fails.append(test)
+ if fails:
+ self.fail("failed LineStart tests:\n{}".format("\n".join(str(expr) for expr in fails)))
- def testLineStart4(self):
+ def testAtLineStart(self):
test = dedent(
"""\
AAA this line
@@ -3663,6 +3668,10 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
)
def testStringStart(self):
+ self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), "123", ["123"])
+ self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), " 123", ["123"])
+ self.assertParseAndCheckList(pp.StringStart() + "123", "123", ["123"])
+ self.assertParseAndCheckList(pp.StringStart() + "123", " 123", ["123"])
self.assertParseAndCheckList(pp.AtStringStart(pp.Word(pp.nums)), "123", ["123"])
self.assertParseAndCheckList(pp.AtStringStart("123"), "123", ["123"])
@@ -3673,6 +3682,40 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
with self.assertRaisesParseException():
pp.AtStringStart("123").parse_string(" 123")
+ def testStringStartAndLineStartInsideAnd(self):
+ P_MTARG = (
+ pp.StringStart()
+ + pp.Word("abcde")
+ + pp.StringEnd()
+ )
+
+ P_MTARG2 = (
+ pp.LineStart()
+ + pp.Word("abcde")
+ + pp.StringEnd()
+ )
+
+ P_MTARG3 = (
+ pp.AtLineStart(pp.Word("abcde"))
+ + pp.StringEnd()
+ )
+
+ def test(expr, string):
+ expr.streamline()
+ print(expr, repr(string), end=" ")
+ print(expr.parse_string(string))
+
+ test(P_MTARG, "aaa")
+ test(P_MTARG2, "aaa")
+ test(P_MTARG2, "\naaa")
+ test(P_MTARG2, " aaa")
+ test(P_MTARG2, "\n aaa")
+
+ with self.assertRaisesParseException():
+ test(P_MTARG3, " aaa")
+ with self.assertRaisesParseException():
+ test(P_MTARG3, "\n aaa")
+
def testLineAndStringEnd(self):
NLs = pp.OneOrMore(pp.lineEnd)