diff options
-rw-r--r-- | CHANGES | 30 | ||||
-rw-r--r-- | examples/test_bibparse.py | 8 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/core.py | 41 | ||||
-rw-r--r-- | tests/test_unit.py | 61 |
5 files changed, 102 insertions, 40 deletions
@@ -4,9 +4,33 @@ Change Log Version 3.0.2 - --------------- -- Performance enhancement to `one_of` to always generate `regex`, even - if `caseless` or `as_keyword` args are given as `True` (unless explicitly - disabled by passing `use_regex=True`). +- Reverted change in behavior with LineStart and StringStart, which changed the + interpretation of when and how LineStart and StringStart should match when + a line starts with spaces. In 3.0.0, the xxxStart expressions were not + really treated like expressions in their own right, but as modifiers to the + following expression when used like `LineStart() + expr`, so that if there + were whitespace on the line before `expr` (which would match in versions prior + to 3.0.0), the match would fail. + + 3.0.0 implemented this by automatically promoting `LineStart() + expr` to + `AtLineStart(expr)`, which broke existing parsers that did not expect `expr` to + necessarily be right at the start of the line, but only be the first token + found on the line. This was reported as a regression in Issue #317. + + In 3.0.2, pyparsing reverts to the previous behavior, but will retain the new + `AtLineStart` and `AtStringStart` expression classes, so that parsers can chose + whichever behavior applies in their specific instance. Specifically: + + # matches expr if it is the first token on the line + # (allows for leading whitespace) + LineStart() + expr + + # matches only if expr is found in column 1 + AtLineStart(expr) + +- Performance enhancement to `one_of` to always generate an internal `Regex`, + even if `caseless` or `as_keyword` args are given as `True` (unless explicitly + disabled by passing `use_regex=False`). Version 3.0.1 - diff --git a/examples/test_bibparse.py b/examples/test_bibparse.py index 9857ab4..b1a55c5 100644 --- a/examples/test_bibparse.py +++ b/examples/test_bibparse.py @@ -57,22 +57,22 @@ class TestBibparse(unittest.TestCase): self.assertEqual(obj.parseString("{}").asList(), []) self.assertEqual(obj.parseString('{a "string}')[0], 'a "string') self.assertEqual( - ["a ", ["nested"], "string"], + ["a ", ["nested"], " string"], obj.parseString("{a {nested} string}").asList(), ) self.assertEqual( - ["a ", ["double ", ["nested"]], "string"], + ["a ", ["double ", ["nested"]], " string"], obj.parseString("{a {double {nested}} string}").asList(), ) for obj in (bp.quoted_string, bp.string, bp.field_value): self.assertEqual([], obj.parseString('""').asList()) self.assertEqual("a string", obj.parseString('"a string"')[0]) self.assertEqual( - ["a ", ["nested"], "string"], + ["a ", ["nested"], " string"], obj.parseString('"a {nested} string"').asList(), ) self.assertEqual( - ["a ", ["double ", ["nested"]], "string"], + ["a ", ["double ", ["nested"]], " string"], obj.parseString('"a {double {nested}} string"').asList(), ) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index bf0fe81..a487736 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + ( ), "", )[__version_info__.release_level == "final"] -__version_time__ = "26 October 2021 20:39 UTC" +__version_time__ = "26 October 2021 23:54 UTC" __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/core.py b/pyparsing/core.py index b1c194b..775c7b4 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -2009,6 +2009,8 @@ class ParserElement(ABC): (Note that this is a raw string literal, you must include the leading ``'r'``.) """ + from .testing import pyparsing_test + parseAll = parseAll and parse_all fullDump = fullDump and full_dump printResults = printResults and print_results @@ -2030,11 +2032,14 @@ class ParserElement(ABC): BOM = "\ufeff" for t in tests: if comment is not None and comment.matches(t, False) or comments and not t: - comments.append(t) + comments.append(pyparsing_test.with_line_numbers(t)) continue if not t: continue - out = ["\n" + "\n".join(comments) if comments else "", t] + out = [ + "\n" + "\n".join(comments) if comments else "", + pyparsing_test.with_line_numbers(t), + ] comments = [] try: # convert newline marks to actual newlines, and strip leading BOM if present @@ -2042,11 +2047,7 @@ class ParserElement(ABC): result = self.parse_string(t, parse_all=parseAll) except ParseBaseException as pe: fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" - if "\n" in t: - out.append(line(pe.loc, t)) - out.append(" " * (col(pe.loc, t) - 1) + "^" + fatal) - else: - out.append(" " * pe.loc + "^" + fatal) + out.append(pe.explain()) out.append("FAIL: " + str(pe)) success = success and failureTests result = pe @@ -3388,22 +3389,20 @@ class LineStart(_PositionToken): def __init__(self): super().__init__() + self.leave_whitespace() + self.orig_whiteChars = set() | self.whiteChars + self.whiteChars.discard("\n") + self.skipper = Empty().set_whitespace_chars(self.whiteChars) self.errmsg = "Expected start of line" - def __add__(self, other): - return AtLineStart(other) - - def __sub__(self, other): - return AtLineStart(other) - Empty() - def preParse(self, instring, loc): if loc == 0: return loc else: - if instring[loc : loc + 1] == "\n" and "\n" in self.whiteChars: - ret = loc + 1 - else: - ret = super().preParse(instring, loc) + ret = self.skipper.preParse(instring, loc) + if "\n" in self.orig_whiteChars: + while instring[ret : ret + 1] == "\n": + ret = self.skipper.preParse(instring, ret + 1) return ret def parseImpl(self, instring, loc, doActions=True): @@ -3444,12 +3443,6 @@ class StringStart(_PositionToken): super().__init__() self.errmsg = "Expected start of text" - def __add__(self, other): - return AtStringStart(other) - - def __sub__(self, other): - return AtStringStart(other) - Empty() - def parseImpl(self, instring, loc, doActions=True): if loc != 0: # see if entire string up to here is just whitespace and ignoreables @@ -3835,6 +3828,7 @@ class Or(ParseExpression): super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) else: self.mayReturnEmpty = True @@ -3976,6 +3970,7 @@ class MatchFirst(ParseExpression): if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) self.callPreparse = all(e.callPreparse for e in self.exprs) + self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) else: self.mayReturnEmpty = True diff --git a/tests/test_unit.py b/tests/test_unit.py index fe4253d..a5c8801 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -3587,14 +3587,14 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): """ test = dedent(test) - print(test) + print(pp.testing.with_line_numbers(test)) print("normal parsing") for t, s, e in (pp.LineStart() + "AAA").scanString(test): - print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s])) + print(s, e, pp.lineno(s, test), pp.line(s, test), repr(t)) print() self.assertEqual( - "A", test[s], "failed LineStart with insignificant newlines" + "A", t[0][0], "failed LineStart with insignificant newlines" ) print(r"parsing without \n in whitespace chars") @@ -3604,10 +3604,10 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s])) print() self.assertEqual( - "A", test[s], "failed LineStart with insignificant newlines" + "A", t[0][0], "failed LineStart with insignificant newlines" ) - def testLineStart3(self): + def testLineStartWithLeadingSpaces(self): # testing issue #272 instring = dedent( """ @@ -3634,16 +3634,21 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): alpha_line | pp.Word("_"), alpha_line | alpha_line, pp.MatchFirst([alpha_line, alpha_line]), + alpha_line ^ pp.Word("_"), + alpha_line ^ alpha_line, + pp.Or([alpha_line, pp.Word("_")]), pp.LineStart() + pp.Word(pp.alphas) + pp.LineEnd().suppress(), pp.And([pp.LineStart(), pp.Word(pp.alphas), pp.LineEnd().suppress()]), ] + fails = [] for test in tests: print(test.searchString(instring)) - self.assertEqual( - ["a", "d", "e"], flatten(sum(test.search_string(instring)).as_list()) - ) + if ['a', 'b', 'c', 'd', 'e', 'f', 'g'] != flatten(sum(test.search_string(instring)).as_list()): + fails.append(test) + if fails: + self.fail("failed LineStart tests:\n{}".format("\n".join(str(expr) for expr in fails))) - def testLineStart4(self): + def testAtLineStart(self): test = dedent( """\ AAA this line @@ -3663,6 +3668,10 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ) def testStringStart(self): + self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), "123", ["123"]) + self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), " 123", ["123"]) + self.assertParseAndCheckList(pp.StringStart() + "123", "123", ["123"]) + self.assertParseAndCheckList(pp.StringStart() + "123", " 123", ["123"]) self.assertParseAndCheckList(pp.AtStringStart(pp.Word(pp.nums)), "123", ["123"]) self.assertParseAndCheckList(pp.AtStringStart("123"), "123", ["123"]) @@ -3673,6 +3682,40 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): with self.assertRaisesParseException(): pp.AtStringStart("123").parse_string(" 123") + def testStringStartAndLineStartInsideAnd(self): + P_MTARG = ( + pp.StringStart() + + pp.Word("abcde") + + pp.StringEnd() + ) + + P_MTARG2 = ( + pp.LineStart() + + pp.Word("abcde") + + pp.StringEnd() + ) + + P_MTARG3 = ( + pp.AtLineStart(pp.Word("abcde")) + + pp.StringEnd() + ) + + def test(expr, string): + expr.streamline() + print(expr, repr(string), end=" ") + print(expr.parse_string(string)) + + test(P_MTARG, "aaa") + test(P_MTARG2, "aaa") + test(P_MTARG2, "\naaa") + test(P_MTARG2, " aaa") + test(P_MTARG2, "\n aaa") + + with self.assertRaisesParseException(): + test(P_MTARG3, " aaa") + with self.assertRaisesParseException(): + test(P_MTARG3, "\n aaa") + def testLineAndStringEnd(self): NLs = pp.OneOrMore(pp.lineEnd) |