diff options
author | ptmcg <ptmcg@austin.rr.com> | 2022-06-29 02:12:10 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2022-06-29 02:12:10 -0500 |
commit | 4cd691f3c3e342f842629a1328a9d12f10af4755 (patch) | |
tree | 9db9583a93fa3c70596bf59d9d5d7444f805a350 | |
parent | cb6858cced83bde0de8d497e3a0f2e39ce9edf59 (diff) | |
download | pyparsing-git-4cd691f3c3e342f842629a1328a9d12f10af4755.tar.gz |
Added python_quoted_string; fixed exception messages for ParseElementEnhance subclasses
-rw-r--r-- | CHANGES | 9 | ||||
-rw-r--r-- | docs/HowToUsePyparsing.rst | 4 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/core.py | 29 | ||||
-rw-r--r-- | pyparsing/exceptions.py | 4 | ||||
-rw-r--r-- | tests/test_diagram.py | 4 | ||||
-rw-r--r-- | tests/test_unit.py | 255 |
7 files changed, 193 insertions, 114 deletions
@@ -28,6 +28,10 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly. +- Added new builtin `python_quoted_string`, which will match any form + of single-line or multiline quoted strings defined in Python. (Inspired + by discussion with Andreas Schörgenhumer in Issue #421.) + - Fixed bug in `Word` when `max=2`. Also added performance enhancement when specifying `exact` argument. Reported in issue #409 by panda-34, nice catch! @@ -35,7 +39,7 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit - `Word` arguments are now validated if `min` and `max` are both given, that `min` <= `max`; raises `ValueError` if values are invalid. -- Extended `expr[]` notation for repetition of expr to accept a +- Extended `expr[]` notation for repetition of `expr` to accept a slice, where the slice's stop value indicates a `stop_on` expression: @@ -62,6 +66,9 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit - Fixed bug in srange, when parsing escaped '/' and '\' inside a range set. +- Fixed exception messages for some ParserElements with custom names, + which instead showed their contained expression names. + - Multiple added and corrected type annotations. With much help from Stephen Rosen, thanks! diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index 454dc6d..8301857 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -6,7 +6,7 @@ Using the pyparsing module :address: ptmcg.pm+pyparsing@gmail.com :revision: 3.0.10 -:date: May, 2022 +:date: July, 2022 :copyright: Copyright |copy| 2003-2022 Paul McGuire. @@ -1308,6 +1308,8 @@ Common string and token constants - ``quoted_string`` - ``sgl_quoted_string | dbl_quoted_string`` +- ``python_quoted_string`` - ``quoted_string | multiline quoted string`` + - ``c_style_comment`` - a comment block delimited by ``'/*'`` and ``'*/'`` sequences; can span multiple lines, but does not support nesting of comments diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 368c5f7..7970791 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 10, "final", 0) -__version_time__ = "24 Jun 2022 16:29 UTC" +__version_time__ = "29 Jun 2022 06:57 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/core.py b/pyparsing/core.py index 11f7368..3a332e9 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -4446,7 +4446,11 @@ class ParseElementEnhance(ParserElement): def parseImpl(self, instring, loc, doActions=True): if self.expr is not None: - return self.expr._parse(instring, loc, doActions, callPreParse=False) + try: + return self.expr._parse(instring, loc, doActions, callPreParse=False) + except ParseBaseException as pbe: + pbe.msg = self.errmsg + raise else: raise ParseException(instring, loc, "No expression defined", self) @@ -5870,10 +5874,29 @@ sgl_quoted_string = Combine( ).set_name("string enclosed in single quotes") quoted_string = Combine( - Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' - | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" + (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( + "double quoted string" + ) + | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( + "single quoted string" + ) ).set_name("quoted string using single or double quotes") +python_quoted_string = Combine( + (Regex(r'"([^"]|""?(?!"))*', flags=re.MULTILINE) + '"""').set_name( + "multiline double quoted string" + ) + | (Regex(r"'([^']|''?(?!'))*", flags=re.MULTILINE) + "'''").set_name( + "multiline single quoted string" + ) + | (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( + "double quoted string" + ) + | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( + "single quoted string" + ) +).set_name("Python quoted string") + unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") diff --git a/pyparsing/exceptions.py b/pyparsing/exceptions.py index b0694c3..869141c 100644 --- a/pyparsing/exceptions.py +++ b/pyparsing/exceptions.py @@ -80,7 +80,9 @@ class ParseBaseException(Exception): f_self = frm.f_locals.get("self", None) if isinstance(f_self, ParserElement): - if not frm.f_code.co_name.startswith(("parseImpl", "_parseNoCache")): + if not frm.f_code.co_name.startswith( + ("parseImpl", "_parseNoCache") + ): continue if id(f_self) in seen: continue diff --git a/tests/test_diagram.py b/tests/test_diagram.py index 821e269..63a0a3f 100644 --- a/tests/test_diagram.py +++ b/tests/test_diagram.py @@ -70,11 +70,11 @@ class TestRailroadDiagrams(unittest.TestCase): def test_sql(self): railroad = self.generate_railroad(simpleSQL, "simpleSQL") - assert len(railroad) == 18 + assert len(railroad) == 20 railroad = self.generate_railroad( simpleSQL, "simpleSQL", show_results_names=True ) - assert len(railroad) == 18 + assert len(railroad) == 20 def test_calendars(self): railroad = self.generate_railroad(calendars, "calendars") diff --git a/tests/test_unit.py b/tests/test_unit.py index b7a23d0..bcea5cd 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -1082,117 +1082,131 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): """ print(testData) - sglStrings = [ - (t[0], b, e) for (t, b, e) in pp.sglQuotedString.scanString(testData) - ] - print(sglStrings) - self.assertTrue( - len(sglStrings) == 1 - and (sglStrings[0][1] == 17 and sglStrings[0][2] == 47), - "single quoted string failure", - ) + with self.subTest(): + sglStrings = [ + (t[0], b, e) for (t, b, e) in pp.sglQuotedString.scanString(testData) + ] + print(sglStrings) + self.assertTrue( + len(sglStrings) == 1 + and (sglStrings[0][1] == 17 and sglStrings[0][2] == 47), + "single quoted string failure", + ) - dblStrings = [ - (t[0], b, e) for (t, b, e) in pp.dblQuotedString.scanString(testData) - ] - print(dblStrings) - self.assertTrue( - len(dblStrings) == 1 - and (dblStrings[0][1] == 154 and dblStrings[0][2] == 184), - "double quoted string failure", - ) + with self.subTest(): + dblStrings = [ + (t[0], b, e) for (t, b, e) in pp.dblQuotedString.scanString(testData) + ] + print(dblStrings) + self.assertTrue( + len(dblStrings) == 1 + and (dblStrings[0][1] == 154 and dblStrings[0][2] == 184), + "double quoted string failure", + ) - allStrings = [ - (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(testData) - ] - print(allStrings) - self.assertTrue( - len(allStrings) == 2 - and (allStrings[0][1] == 17 and allStrings[0][2] == 47) - and (allStrings[1][1] == 154 and allStrings[1][2] == 184), - "quoted string failure", - ) + with self.subTest(): + allStrings = [ + (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(testData) + ] + print(allStrings) + self.assertTrue( + len(allStrings) == 2 + and (allStrings[0][1] == 17 and allStrings[0][2] == 47) + and (allStrings[1][1] == 154 and allStrings[1][2] == 184), + "quoted string failure", + ) escapedQuoteTest = r""" 'This string has an escaped (\') quote character' "This string has an escaped (\") quote character" """ - sglStrings = [ - (t[0], b, e) - for (t, b, e) in pp.sglQuotedString.scanString(escapedQuoteTest) - ] - print(sglStrings) - self.assertTrue( - len(sglStrings) == 1 - and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66), - "single quoted string escaped quote failure (%s)" % str(sglStrings[0]), - ) + with self.subTest(): + sglStrings = [ + (t[0], b, e) + for (t, b, e) in pp.sglQuotedString.scanString(escapedQuoteTest) + ] + print(sglStrings) + self.assertTrue( + len(sglStrings) == 1 + and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66), + "single quoted string escaped quote failure (%s)" % str(sglStrings[0]), + ) - dblStrings = [ - (t[0], b, e) - for (t, b, e) in pp.dblQuotedString.scanString(escapedQuoteTest) - ] - print(dblStrings) - self.assertTrue( - len(dblStrings) == 1 - and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132), - "double quoted string escaped quote failure (%s)" % str(dblStrings[0]), - ) + with self.subTest(): + dblStrings = [ + (t[0], b, e) + for (t, b, e) in pp.dblQuotedString.scanString(escapedQuoteTest) + ] + print(dblStrings) + self.assertTrue( + len(dblStrings) == 1 + and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132), + "double quoted string escaped quote failure (%s)" % str(dblStrings[0]), + ) - allStrings = [ - (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(escapedQuoteTest) - ] - print(allStrings) - self.assertTrue( - len(allStrings) == 2 - and ( - allStrings[0][1] == 17 - and allStrings[0][2] == 66 - and allStrings[1][1] == 83 - and allStrings[1][2] == 132 - ), - "quoted string escaped quote failure (%s)" - % ([str(s[0]) for s in allStrings]), - ) + with self.subTest(): + allStrings = [ + (t[0], b, e) + for (t, b, e) in pp.quotedString.scanString(escapedQuoteTest) + ] + print(allStrings) + self.assertTrue( + len(allStrings) == 2 + and ( + allStrings[0][1] == 17 + and allStrings[0][2] == 66 + and allStrings[1][1] == 83 + and allStrings[1][2] == 132 + ), + "quoted string escaped quote failure (%s)" + % ([str(s[0]) for s in allStrings]), + ) dblQuoteTest = r""" 'This string has an doubled ('') quote character' "This string has an doubled ("") quote character" """ - sglStrings = [ - (t[0], b, e) for (t, b, e) in pp.sglQuotedString.scanString(dblQuoteTest) - ] - print(sglStrings) - self.assertTrue( - len(sglStrings) == 1 - and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66), - "single quoted string escaped quote failure (%s)" % str(sglStrings[0]), - ) - dblStrings = [ - (t[0], b, e) for (t, b, e) in pp.dblQuotedString.scanString(dblQuoteTest) - ] - print(dblStrings) - self.assertTrue( - len(dblStrings) == 1 - and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132), - "double quoted string escaped quote failure (%s)" % str(dblStrings[0]), - ) - allStrings = [ - (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(dblQuoteTest) - ] - print(allStrings) - self.assertTrue( - len(allStrings) == 2 - and ( - allStrings[0][1] == 17 - and allStrings[0][2] == 66 - and allStrings[1][1] == 83 - and allStrings[1][2] == 132 - ), - "quoted string escaped quote failure (%s)" - % ([str(s[0]) for s in allStrings]), - ) + with self.subTest(): + sglStrings = [ + (t[0], b, e) + for (t, b, e) in pp.sglQuotedString.scanString(dblQuoteTest) + ] + print(sglStrings) + self.assertTrue( + len(sglStrings) == 1 + and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66), + "single quoted string escaped quote failure (%s)" % str(sglStrings[0]), + ) + + with self.subTest(): + dblStrings = [ + (t[0], b, e) + for (t, b, e) in pp.dblQuotedString.scanString(dblQuoteTest) + ] + print(dblStrings) + self.assertTrue( + len(dblStrings) == 1 + and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132), + "double quoted string escaped quote failure (%s)" % str(dblStrings[0]), + ) + + with self.subTest(): + allStrings = [ + (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(dblQuoteTest) + ] + print(allStrings) + self.assertTrue( + len(allStrings) == 2 + and ( + allStrings[0][1] == 17 + and allStrings[0][2] == 66 + and allStrings[1][1] == 83 + and allStrings[1][2] == 132 + ), + "quoted string escaped quote failure (%s)" + % ([str(s[0]) for s in allStrings]), + ) print( "testing catastrophic RE backtracking in implementation of dblQuotedString" @@ -1205,17 +1219,37 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): (pp.QuotedString('"'), '"' + "\\xff" * 500), (pp.QuotedString("'"), "'" + "\\xff" * 500), ]: - expr.parseString(test_string + test_string[0], parseAll=True) - try: - expr.parseString(test_string, parseAll=True) - except Exception: - continue + with self.subTest(expr=expr, test_string=test_string): + expr.parseString(test_string + test_string[0], parseAll=True) + try: + expr.parseString(test_string, parseAll=True) + except Exception: + continue # test invalid endQuoteChar - with self.assertRaises( - ValueError, msg="issue raising error for invalid endQuoteChar" - ): - expr = pp.QuotedString('"', endQuoteChar=" ") + with self.subTest(): + with self.assertRaises( + ValueError, msg="issue raising error for invalid endQuoteChar" + ): + expr = pp.QuotedString('"', endQuoteChar=" ") + + with self.subTest(): + source = """ + ''' + multiline quote with comment # this is a comment + ''' + \"\"\" + multiline quote with comment # this is a comment + \"\"\" + "single line quote with comment # this is a comment" + 'single line quote with comment # this is a comment' + """ + stripped = ( + pp.python_style_comment.ignore(pp.python_quoted_string) + .suppress() + .transform_string(source) + ) + self.assertEqual(source, stripped) def testCaselessOneOf(self): caseless1 = pp.oneOf("d a b c aA B A C", caseless=True) @@ -2033,6 +2067,17 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): self.assertParseResultsEquals(testVal, expected_list=expected) + def testCombineSetName(self): + ab = pp.Combine( + pp.Literal("a").set_name("AAA") | pp.Literal("b").set_name("BBB") + ).set_name("AB") + self.assertEqual("AB", ab.name) + self.assertEqual("AB", str(ab)) + try: + ab.parse_string("C") + except ParseException as pe: + self.assertTrue(str(pe).startswith("Expected AB")) + def testHTMLEntities(self): html_source = dedent( """\ |