summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2022-06-29 02:12:10 -0500
committerptmcg <ptmcg@austin.rr.com>2022-06-29 02:12:10 -0500
commit4cd691f3c3e342f842629a1328a9d12f10af4755 (patch)
tree9db9583a93fa3c70596bf59d9d5d7444f805a350
parentcb6858cced83bde0de8d497e3a0f2e39ce9edf59 (diff)
downloadpyparsing-git-4cd691f3c3e342f842629a1328a9d12f10af4755.tar.gz
Added python_quoted_string; fixed exception messages for ParseElementEnhance subclasses
-rw-r--r--CHANGES9
-rw-r--r--docs/HowToUsePyparsing.rst4
-rw-r--r--pyparsing/__init__.py2
-rw-r--r--pyparsing/core.py29
-rw-r--r--pyparsing/exceptions.py4
-rw-r--r--tests/test_diagram.py4
-rw-r--r--tests/test_unit.py255
7 files changed, 193 insertions, 114 deletions
diff --git a/CHANGES b/CHANGES
index d78bfe9..105a644 100644
--- a/CHANGES
+++ b/CHANGES
@@ -28,6 +28,10 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly.
+- Added new builtin `python_quoted_string`, which will match any form
+ of single-line or multiline quoted strings defined in Python. (Inspired
+ by discussion with Andreas Schörgenhumer in Issue #421.)
+
- Fixed bug in `Word` when `max=2`. Also added performance enhancement
when specifying `exact` argument. Reported in issue #409 by
panda-34, nice catch!
@@ -35,7 +39,7 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
- `Word` arguments are now validated if `min` and `max` are both
given, that `min` <= `max`; raises `ValueError` if values are invalid.
-- Extended `expr[]` notation for repetition of expr to accept a
+- Extended `expr[]` notation for repetition of `expr` to accept a
slice, where the slice's stop value indicates a `stop_on`
expression:
@@ -62,6 +66,9 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
- Fixed bug in srange, when parsing escaped '/' and '\' inside a
range set.
+- Fixed exception messages for some ParserElements with custom names,
+ which instead showed their contained expression names.
+
- Multiple added and corrected type annotations. With much help from
Stephen Rosen, thanks!
diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst
index 454dc6d..8301857 100644
--- a/docs/HowToUsePyparsing.rst
+++ b/docs/HowToUsePyparsing.rst
@@ -6,7 +6,7 @@ Using the pyparsing module
:address: ptmcg.pm+pyparsing@gmail.com
:revision: 3.0.10
-:date: May, 2022
+:date: July, 2022
:copyright: Copyright |copy| 2003-2022 Paul McGuire.
@@ -1308,6 +1308,8 @@ Common string and token constants
- ``quoted_string`` - ``sgl_quoted_string | dbl_quoted_string``
+- ``python_quoted_string`` - ``quoted_string | multiline quoted string``
+
- ``c_style_comment`` - a comment block delimited by ``'/*'`` and ``'*/'`` sequences; can span
multiple lines, but does not support nesting of comments
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index 368c5f7..7970791 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -121,7 +121,7 @@ class version_info(NamedTuple):
__version_info__ = version_info(3, 0, 10, "final", 0)
-__version_time__ = "24 Jun 2022 16:29 UTC"
+__version_time__ = "29 Jun 2022 06:57 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/pyparsing/core.py b/pyparsing/core.py
index 11f7368..3a332e9 100644
--- a/pyparsing/core.py
+++ b/pyparsing/core.py
@@ -4446,7 +4446,11 @@ class ParseElementEnhance(ParserElement):
def parseImpl(self, instring, loc, doActions=True):
if self.expr is not None:
- return self.expr._parse(instring, loc, doActions, callPreParse=False)
+ try:
+ return self.expr._parse(instring, loc, doActions, callPreParse=False)
+ except ParseBaseException as pbe:
+ pbe.msg = self.errmsg
+ raise
else:
raise ParseException(instring, loc, "No expression defined", self)
@@ -5870,10 +5874,29 @@ sgl_quoted_string = Combine(
).set_name("string enclosed in single quotes")
quoted_string = Combine(
- Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
- | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
+ (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
+ "double quoted string"
+ )
+ | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
+ "single quoted string"
+ )
).set_name("quoted string using single or double quotes")
+python_quoted_string = Combine(
+ (Regex(r'"([^"]|""?(?!"))*', flags=re.MULTILINE) + '"""').set_name(
+ "multiline double quoted string"
+ )
+ | (Regex(r"'([^']|''?(?!'))*", flags=re.MULTILINE) + "'''").set_name(
+ "multiline single quoted string"
+ )
+ | (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name(
+ "double quoted string"
+ )
+ | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name(
+ "single quoted string"
+ )
+).set_name("Python quoted string")
+
unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
diff --git a/pyparsing/exceptions.py b/pyparsing/exceptions.py
index b0694c3..869141c 100644
--- a/pyparsing/exceptions.py
+++ b/pyparsing/exceptions.py
@@ -80,7 +80,9 @@ class ParseBaseException(Exception):
f_self = frm.f_locals.get("self", None)
if isinstance(f_self, ParserElement):
- if not frm.f_code.co_name.startswith(("parseImpl", "_parseNoCache")):
+ if not frm.f_code.co_name.startswith(
+ ("parseImpl", "_parseNoCache")
+ ):
continue
if id(f_self) in seen:
continue
diff --git a/tests/test_diagram.py b/tests/test_diagram.py
index 821e269..63a0a3f 100644
--- a/tests/test_diagram.py
+++ b/tests/test_diagram.py
@@ -70,11 +70,11 @@ class TestRailroadDiagrams(unittest.TestCase):
def test_sql(self):
railroad = self.generate_railroad(simpleSQL, "simpleSQL")
- assert len(railroad) == 18
+ assert len(railroad) == 20
railroad = self.generate_railroad(
simpleSQL, "simpleSQL", show_results_names=True
)
- assert len(railroad) == 18
+ assert len(railroad) == 20
def test_calendars(self):
railroad = self.generate_railroad(calendars, "calendars")
diff --git a/tests/test_unit.py b/tests/test_unit.py
index b7a23d0..bcea5cd 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -1082,117 +1082,131 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
"""
print(testData)
- sglStrings = [
- (t[0], b, e) for (t, b, e) in pp.sglQuotedString.scanString(testData)
- ]
- print(sglStrings)
- self.assertTrue(
- len(sglStrings) == 1
- and (sglStrings[0][1] == 17 and sglStrings[0][2] == 47),
- "single quoted string failure",
- )
+ with self.subTest():
+ sglStrings = [
+ (t[0], b, e) for (t, b, e) in pp.sglQuotedString.scanString(testData)
+ ]
+ print(sglStrings)
+ self.assertTrue(
+ len(sglStrings) == 1
+ and (sglStrings[0][1] == 17 and sglStrings[0][2] == 47),
+ "single quoted string failure",
+ )
- dblStrings = [
- (t[0], b, e) for (t, b, e) in pp.dblQuotedString.scanString(testData)
- ]
- print(dblStrings)
- self.assertTrue(
- len(dblStrings) == 1
- and (dblStrings[0][1] == 154 and dblStrings[0][2] == 184),
- "double quoted string failure",
- )
+ with self.subTest():
+ dblStrings = [
+ (t[0], b, e) for (t, b, e) in pp.dblQuotedString.scanString(testData)
+ ]
+ print(dblStrings)
+ self.assertTrue(
+ len(dblStrings) == 1
+ and (dblStrings[0][1] == 154 and dblStrings[0][2] == 184),
+ "double quoted string failure",
+ )
- allStrings = [
- (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(testData)
- ]
- print(allStrings)
- self.assertTrue(
- len(allStrings) == 2
- and (allStrings[0][1] == 17 and allStrings[0][2] == 47)
- and (allStrings[1][1] == 154 and allStrings[1][2] == 184),
- "quoted string failure",
- )
+ with self.subTest():
+ allStrings = [
+ (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(testData)
+ ]
+ print(allStrings)
+ self.assertTrue(
+ len(allStrings) == 2
+ and (allStrings[0][1] == 17 and allStrings[0][2] == 47)
+ and (allStrings[1][1] == 154 and allStrings[1][2] == 184),
+ "quoted string failure",
+ )
escapedQuoteTest = r"""
'This string has an escaped (\') quote character'
"This string has an escaped (\") quote character"
"""
- sglStrings = [
- (t[0], b, e)
- for (t, b, e) in pp.sglQuotedString.scanString(escapedQuoteTest)
- ]
- print(sglStrings)
- self.assertTrue(
- len(sglStrings) == 1
- and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66),
- "single quoted string escaped quote failure (%s)" % str(sglStrings[0]),
- )
+ with self.subTest():
+ sglStrings = [
+ (t[0], b, e)
+ for (t, b, e) in pp.sglQuotedString.scanString(escapedQuoteTest)
+ ]
+ print(sglStrings)
+ self.assertTrue(
+ len(sglStrings) == 1
+ and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66),
+ "single quoted string escaped quote failure (%s)" % str(sglStrings[0]),
+ )
- dblStrings = [
- (t[0], b, e)
- for (t, b, e) in pp.dblQuotedString.scanString(escapedQuoteTest)
- ]
- print(dblStrings)
- self.assertTrue(
- len(dblStrings) == 1
- and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132),
- "double quoted string escaped quote failure (%s)" % str(dblStrings[0]),
- )
+ with self.subTest():
+ dblStrings = [
+ (t[0], b, e)
+ for (t, b, e) in pp.dblQuotedString.scanString(escapedQuoteTest)
+ ]
+ print(dblStrings)
+ self.assertTrue(
+ len(dblStrings) == 1
+ and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132),
+ "double quoted string escaped quote failure (%s)" % str(dblStrings[0]),
+ )
- allStrings = [
- (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(escapedQuoteTest)
- ]
- print(allStrings)
- self.assertTrue(
- len(allStrings) == 2
- and (
- allStrings[0][1] == 17
- and allStrings[0][2] == 66
- and allStrings[1][1] == 83
- and allStrings[1][2] == 132
- ),
- "quoted string escaped quote failure (%s)"
- % ([str(s[0]) for s in allStrings]),
- )
+ with self.subTest():
+ allStrings = [
+ (t[0], b, e)
+ for (t, b, e) in pp.quotedString.scanString(escapedQuoteTest)
+ ]
+ print(allStrings)
+ self.assertTrue(
+ len(allStrings) == 2
+ and (
+ allStrings[0][1] == 17
+ and allStrings[0][2] == 66
+ and allStrings[1][1] == 83
+ and allStrings[1][2] == 132
+ ),
+ "quoted string escaped quote failure (%s)"
+ % ([str(s[0]) for s in allStrings]),
+ )
dblQuoteTest = r"""
'This string has an doubled ('') quote character'
"This string has an doubled ("") quote character"
"""
- sglStrings = [
- (t[0], b, e) for (t, b, e) in pp.sglQuotedString.scanString(dblQuoteTest)
- ]
- print(sglStrings)
- self.assertTrue(
- len(sglStrings) == 1
- and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66),
- "single quoted string escaped quote failure (%s)" % str(sglStrings[0]),
- )
- dblStrings = [
- (t[0], b, e) for (t, b, e) in pp.dblQuotedString.scanString(dblQuoteTest)
- ]
- print(dblStrings)
- self.assertTrue(
- len(dblStrings) == 1
- and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132),
- "double quoted string escaped quote failure (%s)" % str(dblStrings[0]),
- )
- allStrings = [
- (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(dblQuoteTest)
- ]
- print(allStrings)
- self.assertTrue(
- len(allStrings) == 2
- and (
- allStrings[0][1] == 17
- and allStrings[0][2] == 66
- and allStrings[1][1] == 83
- and allStrings[1][2] == 132
- ),
- "quoted string escaped quote failure (%s)"
- % ([str(s[0]) for s in allStrings]),
- )
+ with self.subTest():
+ sglStrings = [
+ (t[0], b, e)
+ for (t, b, e) in pp.sglQuotedString.scanString(dblQuoteTest)
+ ]
+ print(sglStrings)
+ self.assertTrue(
+ len(sglStrings) == 1
+ and (sglStrings[0][1] == 17 and sglStrings[0][2] == 66),
+ "single quoted string escaped quote failure (%s)" % str(sglStrings[0]),
+ )
+
+ with self.subTest():
+ dblStrings = [
+ (t[0], b, e)
+ for (t, b, e) in pp.dblQuotedString.scanString(dblQuoteTest)
+ ]
+ print(dblStrings)
+ self.assertTrue(
+ len(dblStrings) == 1
+ and (dblStrings[0][1] == 83 and dblStrings[0][2] == 132),
+ "double quoted string escaped quote failure (%s)" % str(dblStrings[0]),
+ )
+
+ with self.subTest():
+ allStrings = [
+ (t[0], b, e) for (t, b, e) in pp.quotedString.scanString(dblQuoteTest)
+ ]
+ print(allStrings)
+ self.assertTrue(
+ len(allStrings) == 2
+ and (
+ allStrings[0][1] == 17
+ and allStrings[0][2] == 66
+ and allStrings[1][1] == 83
+ and allStrings[1][2] == 132
+ ),
+ "quoted string escaped quote failure (%s)"
+ % ([str(s[0]) for s in allStrings]),
+ )
print(
"testing catastrophic RE backtracking in implementation of dblQuotedString"
@@ -1205,17 +1219,37 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
(pp.QuotedString('"'), '"' + "\\xff" * 500),
(pp.QuotedString("'"), "'" + "\\xff" * 500),
]:
- expr.parseString(test_string + test_string[0], parseAll=True)
- try:
- expr.parseString(test_string, parseAll=True)
- except Exception:
- continue
+ with self.subTest(expr=expr, test_string=test_string):
+ expr.parseString(test_string + test_string[0], parseAll=True)
+ try:
+ expr.parseString(test_string, parseAll=True)
+ except Exception:
+ continue
# test invalid endQuoteChar
- with self.assertRaises(
- ValueError, msg="issue raising error for invalid endQuoteChar"
- ):
- expr = pp.QuotedString('"', endQuoteChar=" ")
+ with self.subTest():
+ with self.assertRaises(
+ ValueError, msg="issue raising error for invalid endQuoteChar"
+ ):
+ expr = pp.QuotedString('"', endQuoteChar=" ")
+
+ with self.subTest():
+ source = """
+ '''
+ multiline quote with comment # this is a comment
+ '''
+ \"\"\"
+ multiline quote with comment # this is a comment
+ \"\"\"
+ "single line quote with comment # this is a comment"
+ 'single line quote with comment # this is a comment'
+ """
+ stripped = (
+ pp.python_style_comment.ignore(pp.python_quoted_string)
+ .suppress()
+ .transform_string(source)
+ )
+ self.assertEqual(source, stripped)
def testCaselessOneOf(self):
caseless1 = pp.oneOf("d a b c aA B A C", caseless=True)
@@ -2033,6 +2067,17 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
self.assertParseResultsEquals(testVal, expected_list=expected)
+ def testCombineSetName(self):
+ ab = pp.Combine(
+ pp.Literal("a").set_name("AAA") | pp.Literal("b").set_name("BBB")
+ ).set_name("AB")
+ self.assertEqual("AB", ab.name)
+ self.assertEqual("AB", str(ab))
+ try:
+ ab.parse_string("C")
+ except ParseException as pe:
+ self.assertTrue(str(pe).startswith("Expected AB"))
+
def testHTMLEntities(self):
html_source = dedent(
"""\