diff options
author | ptmcg <ptmcg@austin.rr.com> | 2022-07-09 15:37:04 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2022-07-09 15:37:04 -0500 |
commit | 97b30229dcdebddc341df114d9d438431179f4bb (patch) | |
tree | 4a991abe022fa078d4d7a2aff99209519bccaee5 | |
parent | fe195a849c62e58a8de3274830c3ebc98f892750 (diff) | |
parent | 9751d0c686583fa4ade133fa845297f0f342c718 (diff) | |
download | pyparsing-git-97b30229dcdebddc341df114d9d438431179f4bb.tar.gz |
Merge branch 'ptm_address_booleansearchparser_issue'
# Conflicts:
# examples/booleansearchparser.py
# pyparsing/__init__.py
# tests/test_examples.py
# tests/test_unit.py
-rw-r--r-- | CHANGES | 6 | ||||
-rw-r--r-- | examples/booleansearchparser.py | 151 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | tests/test_unit.py | 87 |
4 files changed, 186 insertions, 60 deletions
@@ -23,8 +23,8 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit "{" + (Literal("A") | Literal("a") | "") + "}" Some related changes implemented as part of this work: - - Literal("") now internally generates an Empty() (and no longer raises an exception) - - Empty is now a subclass of Literal + - `Literal("")` now internally generates an `Empty()` (and no longer raises an exception) + - `Empty` is now a subclass of `Literal` Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly. @@ -70,7 +70,7 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit - Fixed bug in srange, when parsing escaped '/' and '\' inside a range set. -- Fixed exception messages for some ParserElements with custom names, +- Fixed exception messages for some `ParserElements` with custom names, which instead showed their contained expression names. - Multiple added and corrected type annotations. With much help from diff --git a/examples/booleansearchparser.py b/examples/booleansearchparser.py index f612379..cefba01 100644 --- a/examples/booleansearchparser.py +++ b/examples/booleansearchparser.py @@ -90,11 +90,36 @@ from pyparsing import ( Suppress, OneOrMore, one_of, - pyparsing_unicode as ppu, ) import re +# Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt +# (includes characters not found in the BasicMultilingualPlane) +alphabet_ranges = [ + # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block) + [int("0400", 16), int("04FF", 16)], + # ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F)) + [int("0600", 16), int("07FF", 16)], + # THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block) + [int("0E00", 16), int("0E7F", 16)], + # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system (Hiragana (3040–309F) + Katakana (30A0–30FF)) + [int("3040", 16), int("30FF", 16)], + # Enclosed CJK Letters and Months + [int("3200", 16), int("32FF", 16)], + # CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + [int("4E00", 16), int("9FFF", 16)], + # KOREAN : https://en.wikipedia.org/wiki/Hangul + [int("1100", 16), int("11FF", 16)], + [int("3130", 16), int("318F", 16)], + [int("A960", 16), int("A97F", 16)], + [int("AC00", 16), int("D7AF", 16)], + [int("D7B0", 16), int("D7FF", 16)], + # Halfwidth and Fullwidth Forms + [int("FF00", 16), int("FFEF", 16)], +] + + class BooleanSearchParser: def __init__(self, only_parse=False): self._methods = { @@ -128,8 +153,11 @@ class BooleanSearchParser: """ operatorOr = Forward() + alphabet = alphanums + # support for non-western alphabets - alphabet = ppu.BasicMultilingualPlane.alphanums + for lo, hi in alphabet_ranges: + alphabet += "".join(chr(c) for c in range(lo, hi + 1) if not chr(c).isspace()) operatorWord = Group(Word(alphabet + "*")).set_results_name("word*") @@ -297,58 +325,66 @@ class ParserTest(BooleanSearchParser): def Test(self): # fmt: off exprs = { - 0: "help", - 1: "help or hulp", - 2: "help and hulp", - 3: "help hulp", - 4: "help and hulp or hilp", - 5: "help or hulp and hilp", - 6: "help or hulp or hilp or halp", - 7: "(help or hulp) and (hilp or halp)", - 8: "help and (hilp or halp)", - 9: "(help and (hilp or halp)) or hulp", - 10: "not help", - 11: "not hulp and halp", - 12: "not (help and halp)", - 13: '"help me please"', - 14: '"help me please" or hulp', - 15: '"help me please" or (hulp and halp)', - 16: "help*", - 17: "help or hulp*", - 18: "help* and hulp", - 19: "help and hulp* or hilp", - 20: "help* or hulp or hilp or halp", - 21: "(help or hulp*) and (hilp* or halp)", - 22: "help* and (hilp* or halp*)", - 23: "(help and (hilp* or halp)) or hulp*", - 24: "not help* and halp", - 25: "not (help* and helpe*)", - 26: '"help* me please"', - 27: '"help* me* please" or hulp*', - 28: '"help me please*" or (hulp and halp)', - 29: '"help me please" not (hulp and halp)', - 30: '"help me please" hulp', - 31: "help and hilp and not holp", - 32: "help hilp not holp", - 33: "help hilp and not holp", - 34: "*lp and halp", - 35: "*신은 and 어떠세요", - 36: "not 당신은", - 37: "당신 or 당", - 38: "亀", + "0": "help", + "1": "help or hulp", + "2": "help and hulp", + "3": "help hulp", + "4": "help and hulp or hilp", + "5": "help or hulp and hilp", + "6": "help or hulp or hilp or halp", + "7": "(help or hulp) and (hilp or halp)", + "8": "help and (hilp or halp)", + "9": "(help and (hilp or halp)) or hulp", + "10": "not help", + "11": "not hulp and halp", + "12": "not (help and halp)", + "13": '"help me please"', + "14": '"help me please" or hulp', + "15": '"help me please" or (hulp and halp)', + "16": "help*", + "17": "help or hulp*", + "18": "help* and hulp", + "19": "help and hulp* or hilp", + "20": "help* or hulp or hilp or halp", + "21": "(help or hulp*) and (hilp* or halp)", + "22": "help* and (hilp* or halp*)", + "23": "(help and (hilp* or halp)) or hulp*", + "24": "not help* and halp", + "25": "not (help* and helpe*)", + "26": '"help* me please"', + "27": '"help* me* please" or hulp*', + "28": '"help me please*" or (hulp and halp)', + "29": '"help me please" not (hulp and halp)', + "30": '"help me please" hulp', + "31": "help and hilp and not holp", + "32": "help hilp not holp", + "33": "help hilp and not holp", + "34": "*lp and halp", + "35": "*신은 and 어떠세요", } texts_matcheswith = { - "halp thinks he needs help": [0, 1, 5, 6, 7, 8, 9, 11, 16, 17, 20, 21, 22, 23, 25, 34, 36], - "he needs halp": [6, 10, 11, 12, 20, 24, 25, 34, 36], - "help": [0, 1, 5, 6, 12, 16, 17, 20, 25, 36], - "help hilp": [0, 1, 4, 5, 6, 7, 8, 9, 12, 16, 17, 19, 20, 21, 22, 23, 25, 31, 32, 33, 36], - "help me please hulp": [0, 1, 2, 3, 4, 5, 6, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 27, 29, 30, 36], - "helper": [10, 12, 16, 20, 36], - "hulp hilp": [1, 4, 5, 6, 7, 9, 10, 12, 14, 17, 19, 20, 21, 23, 25, 27, 36], - "nothing": [10, 12, 25, 36], - "안녕하세요, 당신은 어떠세요?": [10, 12, 25, 35], - "亀": [10, 12, 25, 36, 38], + "halp thinks he needs help": [ + "25", "22", "20", "21", "11", "17", "16", "23", "34", "1", + "0", "5", "7", "6", "9", "8", + ], + "he needs halp": ["24", "25", "20", "11", "10", "12", "34", "6"], + "help": ["25", "20", "12", "17", "16", "1", "0", "5", "6"], + "help hilp": [ + "25", "22", "20", "32", "21", "12", "17", "16", "19", "31", + "23", "1", "0", "5", "4", "7", "6", "9", "8", "33", + ], + "help me please hulp": [ + "30", "25", "27", "20", "13", "12", "15", "14", "17", "16", + "19", "18", "23", "29", "1", "0", "3", "2", "5", "4", "6", "9", + ], + "helper": ["20", "10", "12", "16"], + "hulp hilp": [ + "25", "27", "20", "21", "10", "12", "14", "17", "19", "23", + "1", "5", "4", "7", "6", "9", + ], + "nothing": ["25", "10", "12"], + "안녕하세요, 당신은 어떠세요?": ["10", "12", "25", "35"], } # fmt: on @@ -360,7 +396,9 @@ class ParserTest(BooleanSearchParser): _matches.append(_id) test_passed = sorted(matches) == sorted(_matches) - if not test_passed: + if test_passed: + print("Passed", repr(text)) + else: print("Failed", repr(text), "expected", matches, "matched", _matches) all_ok = all_ok and test_passed @@ -391,7 +429,9 @@ class ParserTest(BooleanSearchParser): _matches.append(_id) test_passed = sorted(matches) == sorted(_matches) - if not test_passed: + if test_passed: + print("Passed", repr(text)) + else: print("Failed", repr(text), "expected", matches, "matched", _matches) all_ok = all_ok and test_passed @@ -402,11 +442,10 @@ class ParserTest(BooleanSearchParser): def main(): if ParserTest().Test(): print("All tests OK") - exit(0) else: print("One or more tests FAILED") - exit(1) + raise Exception("One or more tests FAILED") if __name__ == "__main__": - main()
\ No newline at end of file + main() diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index f47f7ff..dd66063 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 10, "final", 0) -__version_time__ = "09 Jul 2022 13:52 UTC" +__version_time__ = "09 Jul 2022 20:01 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/tests/test_unit.py b/tests/test_unit.py index 0687f1c..d955c48 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -8313,6 +8313,93 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expr, source, [s.strip() for s in source.split(",")] ) + def testDelimitedListParseActions1(self): + # from issue #408 + keyword = pp.Keyword('foobar') + untyped_identifier = ~keyword + pp.Word(pp.alphas) + dotted_vars = pp.delimited_list(untyped_identifier, delim='.') + lvalue = pp.Opt(dotted_vars) + + # uncomment this line to see the problem + stmt = pp.delimited_list(pp.Opt(dotted_vars)) + # stmt = delimited_list(dotted_vars) + # stmt = pp.Opt(dotted_vars) + + def parse_identifier(toks): + print('YAY!', toks) + + untyped_identifier.set_parse_action(parse_identifier) + + save_stdout = StringIO() + with contextlib.redirect_stdout(save_stdout): + dotted_vars.parse_string('B.C') + + self.assertEqual( + dedent("""\ + YAY! ['B'] + YAY! ['C'] + """), + save_stdout.getvalue() + ) + + def testDelimitedListParseActions2(self): + # from issue #408 + keyword = pp.Keyword('foobar') + untyped_identifier = ~keyword + pp.Word(pp.alphas) + dotted_vars = pp.delimited_list(untyped_identifier, delim='.') + lvalue = pp.Opt(dotted_vars) + + # uncomment this line to see the problem + # stmt = delimited_list(Opt(dotted_vars)) + stmt = pp.delimited_list(dotted_vars) + # stmt = pp.Opt(dotted_vars) + + def parse_identifier(toks): + print('YAY!', toks) + + untyped_identifier.set_parse_action(parse_identifier) + + save_stdout = StringIO() + with contextlib.redirect_stdout(save_stdout): + dotted_vars.parse_string('B.C') + + self.assertEqual( + dedent("""\ + YAY! ['B'] + YAY! ['C'] + """), + save_stdout.getvalue() + ) + + def testDelimitedListParseActions3(self): + # from issue #408 + keyword = pp.Keyword('foobar') + untyped_identifier = ~keyword + pp.Word(pp.alphas) + dotted_vars = pp.delimited_list(untyped_identifier, delim='.') + lvalue = pp.Opt(dotted_vars) + + # uncomment this line to see the problem + # stmt = delimited_list(Opt(dotted_vars)) + # stmt = delimited_list(dotted_vars) + stmt = pp.Opt(dotted_vars) + + def parse_identifier(toks): + print('YAY!', toks) + + untyped_identifier.set_parse_action(parse_identifier) + + save_stdout = StringIO() + with contextlib.redirect_stdout(save_stdout): + dotted_vars.parse_string('B.C') + + self.assertEqual( + dedent("""\ + YAY! ['B'] + YAY! ['C'] + """), + save_stdout.getvalue() + ) + def testEnableDebugOnNamedExpressions(self): """ - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent |