summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2022-07-09 15:37:04 -0500
committerptmcg <ptmcg@austin.rr.com>2022-07-09 15:37:04 -0500
commit97b30229dcdebddc341df114d9d438431179f4bb (patch)
tree4a991abe022fa078d4d7a2aff99209519bccaee5
parentfe195a849c62e58a8de3274830c3ebc98f892750 (diff)
parent9751d0c686583fa4ade133fa845297f0f342c718 (diff)
downloadpyparsing-git-97b30229dcdebddc341df114d9d438431179f4bb.tar.gz
Merge branch 'ptm_address_booleansearchparser_issue'
# Conflicts: # examples/booleansearchparser.py # pyparsing/__init__.py # tests/test_examples.py # tests/test_unit.py
-rw-r--r--CHANGES6
-rw-r--r--examples/booleansearchparser.py151
-rw-r--r--pyparsing/__init__.py2
-rw-r--r--tests/test_unit.py87
4 files changed, 186 insertions, 60 deletions
diff --git a/CHANGES b/CHANGES
index 6e651db..9a4baa8 100644
--- a/CHANGES
+++ b/CHANGES
@@ -23,8 +23,8 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
"{" + (Literal("A") | Literal("a") | "") + "}"
Some related changes implemented as part of this work:
- - Literal("") now internally generates an Empty() (and no longer raises an exception)
- - Empty is now a subclass of Literal
+ - `Literal("")` now internally generates an `Empty()` (and no longer raises an exception)
+ - `Empty` is now a subclass of `Literal`
Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly.
@@ -70,7 +70,7 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
- Fixed bug in srange, when parsing escaped '/' and '\' inside a
range set.
-- Fixed exception messages for some ParserElements with custom names,
+- Fixed exception messages for some `ParserElements` with custom names,
which instead showed their contained expression names.
- Multiple added and corrected type annotations. With much help from
diff --git a/examples/booleansearchparser.py b/examples/booleansearchparser.py
index f612379..cefba01 100644
--- a/examples/booleansearchparser.py
+++ b/examples/booleansearchparser.py
@@ -90,11 +90,36 @@ from pyparsing import (
Suppress,
OneOrMore,
one_of,
- pyparsing_unicode as ppu,
)
import re
+# Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt
+# (includes characters not found in the BasicMultilingualPlane)
+alphabet_ranges = [
+ # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block)
+ [int("0400", 16), int("04FF", 16)],
+ # ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F))
+ [int("0600", 16), int("07FF", 16)],
+ # THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block)
+ [int("0E00", 16), int("0E7F", 16)],
+ # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system (Hiragana (3040–309F) + Katakana (30A0–30FF))
+ [int("3040", 16), int("30FF", 16)],
+ # Enclosed CJK Letters and Months
+ [int("3200", 16), int("32FF", 16)],
+ # CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+ [int("4E00", 16), int("9FFF", 16)],
+ # KOREAN : https://en.wikipedia.org/wiki/Hangul
+ [int("1100", 16), int("11FF", 16)],
+ [int("3130", 16), int("318F", 16)],
+ [int("A960", 16), int("A97F", 16)],
+ [int("AC00", 16), int("D7AF", 16)],
+ [int("D7B0", 16), int("D7FF", 16)],
+ # Halfwidth and Fullwidth Forms
+ [int("FF00", 16), int("FFEF", 16)],
+]
+
+
class BooleanSearchParser:
def __init__(self, only_parse=False):
self._methods = {
@@ -128,8 +153,11 @@ class BooleanSearchParser:
"""
operatorOr = Forward()
+ alphabet = alphanums
+
# support for non-western alphabets
- alphabet = ppu.BasicMultilingualPlane.alphanums
+ for lo, hi in alphabet_ranges:
+ alphabet += "".join(chr(c) for c in range(lo, hi + 1) if not chr(c).isspace())
operatorWord = Group(Word(alphabet + "*")).set_results_name("word*")
@@ -297,58 +325,66 @@ class ParserTest(BooleanSearchParser):
def Test(self):
# fmt: off
exprs = {
- 0: "help",
- 1: "help or hulp",
- 2: "help and hulp",
- 3: "help hulp",
- 4: "help and hulp or hilp",
- 5: "help or hulp and hilp",
- 6: "help or hulp or hilp or halp",
- 7: "(help or hulp) and (hilp or halp)",
- 8: "help and (hilp or halp)",
- 9: "(help and (hilp or halp)) or hulp",
- 10: "not help",
- 11: "not hulp and halp",
- 12: "not (help and halp)",
- 13: '"help me please"',
- 14: '"help me please" or hulp',
- 15: '"help me please" or (hulp and halp)',
- 16: "help*",
- 17: "help or hulp*",
- 18: "help* and hulp",
- 19: "help and hulp* or hilp",
- 20: "help* or hulp or hilp or halp",
- 21: "(help or hulp*) and (hilp* or halp)",
- 22: "help* and (hilp* or halp*)",
- 23: "(help and (hilp* or halp)) or hulp*",
- 24: "not help* and halp",
- 25: "not (help* and helpe*)",
- 26: '"help* me please"',
- 27: '"help* me* please" or hulp*',
- 28: '"help me please*" or (hulp and halp)',
- 29: '"help me please" not (hulp and halp)',
- 30: '"help me please" hulp',
- 31: "help and hilp and not holp",
- 32: "help hilp not holp",
- 33: "help hilp and not holp",
- 34: "*lp and halp",
- 35: "*신은 and 어떠세요",
- 36: "not 당신은",
- 37: "당신 or 당",
- 38: "亀",
+ "0": "help",
+ "1": "help or hulp",
+ "2": "help and hulp",
+ "3": "help hulp",
+ "4": "help and hulp or hilp",
+ "5": "help or hulp and hilp",
+ "6": "help or hulp or hilp or halp",
+ "7": "(help or hulp) and (hilp or halp)",
+ "8": "help and (hilp or halp)",
+ "9": "(help and (hilp or halp)) or hulp",
+ "10": "not help",
+ "11": "not hulp and halp",
+ "12": "not (help and halp)",
+ "13": '"help me please"',
+ "14": '"help me please" or hulp',
+ "15": '"help me please" or (hulp and halp)',
+ "16": "help*",
+ "17": "help or hulp*",
+ "18": "help* and hulp",
+ "19": "help and hulp* or hilp",
+ "20": "help* or hulp or hilp or halp",
+ "21": "(help or hulp*) and (hilp* or halp)",
+ "22": "help* and (hilp* or halp*)",
+ "23": "(help and (hilp* or halp)) or hulp*",
+ "24": "not help* and halp",
+ "25": "not (help* and helpe*)",
+ "26": '"help* me please"',
+ "27": '"help* me* please" or hulp*',
+ "28": '"help me please*" or (hulp and halp)',
+ "29": '"help me please" not (hulp and halp)',
+ "30": '"help me please" hulp',
+ "31": "help and hilp and not holp",
+ "32": "help hilp not holp",
+ "33": "help hilp and not holp",
+ "34": "*lp and halp",
+ "35": "*신은 and 어떠세요",
}
texts_matcheswith = {
- "halp thinks he needs help": [0, 1, 5, 6, 7, 8, 9, 11, 16, 17, 20, 21, 22, 23, 25, 34, 36],
- "he needs halp": [6, 10, 11, 12, 20, 24, 25, 34, 36],
- "help": [0, 1, 5, 6, 12, 16, 17, 20, 25, 36],
- "help hilp": [0, 1, 4, 5, 6, 7, 8, 9, 12, 16, 17, 19, 20, 21, 22, 23, 25, 31, 32, 33, 36],
- "help me please hulp": [0, 1, 2, 3, 4, 5, 6, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 27, 29, 30, 36],
- "helper": [10, 12, 16, 20, 36],
- "hulp hilp": [1, 4, 5, 6, 7, 9, 10, 12, 14, 17, 19, 20, 21, 23, 25, 27, 36],
- "nothing": [10, 12, 25, 36],
- "안녕하세요, 당신은 어떠세요?": [10, 12, 25, 35],
- "亀": [10, 12, 25, 36, 38],
+ "halp thinks he needs help": [
+ "25", "22", "20", "21", "11", "17", "16", "23", "34", "1",
+ "0", "5", "7", "6", "9", "8",
+ ],
+ "he needs halp": ["24", "25", "20", "11", "10", "12", "34", "6"],
+ "help": ["25", "20", "12", "17", "16", "1", "0", "5", "6"],
+ "help hilp": [
+ "25", "22", "20", "32", "21", "12", "17", "16", "19", "31",
+ "23", "1", "0", "5", "4", "7", "6", "9", "8", "33",
+ ],
+ "help me please hulp": [
+ "30", "25", "27", "20", "13", "12", "15", "14", "17", "16",
+ "19", "18", "23", "29", "1", "0", "3", "2", "5", "4", "6", "9",
+ ],
+ "helper": ["20", "10", "12", "16"],
+ "hulp hilp": [
+ "25", "27", "20", "21", "10", "12", "14", "17", "19", "23",
+ "1", "5", "4", "7", "6", "9",
+ ],
+ "nothing": ["25", "10", "12"],
+ "안녕하세요, 당신은 어떠세요?": ["10", "12", "25", "35"],
}
# fmt: on
@@ -360,7 +396,9 @@ class ParserTest(BooleanSearchParser):
_matches.append(_id)
test_passed = sorted(matches) == sorted(_matches)
- if not test_passed:
+ if test_passed:
+ print("Passed", repr(text))
+ else:
print("Failed", repr(text), "expected", matches, "matched", _matches)
all_ok = all_ok and test_passed
@@ -391,7 +429,9 @@ class ParserTest(BooleanSearchParser):
_matches.append(_id)
test_passed = sorted(matches) == sorted(_matches)
- if not test_passed:
+ if test_passed:
+ print("Passed", repr(text))
+ else:
print("Failed", repr(text), "expected", matches, "matched", _matches)
all_ok = all_ok and test_passed
@@ -402,11 +442,10 @@ class ParserTest(BooleanSearchParser):
def main():
if ParserTest().Test():
print("All tests OK")
- exit(0)
else:
print("One or more tests FAILED")
- exit(1)
+ raise Exception("One or more tests FAILED")
if __name__ == "__main__":
- main() \ No newline at end of file
+ main()
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index f47f7ff..dd66063 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -121,7 +121,7 @@ class version_info(NamedTuple):
__version_info__ = version_info(3, 0, 10, "final", 0)
-__version_time__ = "09 Jul 2022 13:52 UTC"
+__version_time__ = "09 Jul 2022 20:01 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 0687f1c..d955c48 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -8313,6 +8313,93 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
expr, source, [s.strip() for s in source.split(",")]
)
+ def testDelimitedListParseActions1(self):
+ # from issue #408
+ keyword = pp.Keyword('foobar')
+ untyped_identifier = ~keyword + pp.Word(pp.alphas)
+ dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+ lvalue = pp.Opt(dotted_vars)
+
+ # uncomment this line to see the problem
+ stmt = pp.delimited_list(pp.Opt(dotted_vars))
+ # stmt = delimited_list(dotted_vars)
+ # stmt = pp.Opt(dotted_vars)
+
+ def parse_identifier(toks):
+ print('YAY!', toks)
+
+ untyped_identifier.set_parse_action(parse_identifier)
+
+ save_stdout = StringIO()
+ with contextlib.redirect_stdout(save_stdout):
+ dotted_vars.parse_string('B.C')
+
+ self.assertEqual(
+ dedent("""\
+ YAY! ['B']
+ YAY! ['C']
+ """),
+ save_stdout.getvalue()
+ )
+
+ def testDelimitedListParseActions2(self):
+ # from issue #408
+ keyword = pp.Keyword('foobar')
+ untyped_identifier = ~keyword + pp.Word(pp.alphas)
+ dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+ lvalue = pp.Opt(dotted_vars)
+
+ # uncomment this line to see the problem
+ # stmt = delimited_list(Opt(dotted_vars))
+ stmt = pp.delimited_list(dotted_vars)
+ # stmt = pp.Opt(dotted_vars)
+
+ def parse_identifier(toks):
+ print('YAY!', toks)
+
+ untyped_identifier.set_parse_action(parse_identifier)
+
+ save_stdout = StringIO()
+ with contextlib.redirect_stdout(save_stdout):
+ dotted_vars.parse_string('B.C')
+
+ self.assertEqual(
+ dedent("""\
+ YAY! ['B']
+ YAY! ['C']
+ """),
+ save_stdout.getvalue()
+ )
+
+ def testDelimitedListParseActions3(self):
+ # from issue #408
+ keyword = pp.Keyword('foobar')
+ untyped_identifier = ~keyword + pp.Word(pp.alphas)
+ dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+ lvalue = pp.Opt(dotted_vars)
+
+ # uncomment this line to see the problem
+ # stmt = delimited_list(Opt(dotted_vars))
+ # stmt = delimited_list(dotted_vars)
+ stmt = pp.Opt(dotted_vars)
+
+ def parse_identifier(toks):
+ print('YAY!', toks)
+
+ untyped_identifier.set_parse_action(parse_identifier)
+
+ save_stdout = StringIO()
+ with contextlib.redirect_stdout(save_stdout):
+ dotted_vars.parse_string('B.C')
+
+ self.assertEqual(
+ dedent("""\
+ YAY! ['B']
+ YAY! ['C']
+ """),
+ save_stdout.getvalue()
+ )
+
def testEnableDebugOnNamedExpressions(self):
"""
- enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent