Merge branch 'ptm_address_booleansearchparser_issue'

# Conflicts: # examples/booleansearchparser.py # pyparsing/__init__.py # tests/test_examples.py # tests/test_unit.py
author: ptmcg <ptmcg@austin.rr.com> 2022-07-09 15:37:04 -0500
committer: ptmcg <ptmcg@austin.rr.com> 2022-07-09 15:37:04 -0500
commit: 97b30229dcdebddc341df114d9d438431179f4bb (patch)
tree: 4a991abe022fa078d4d7a2aff99209519bccaee5
parent: fe195a849c62e58a8de3274830c3ebc98f892750 (diff)
parent: 9751d0c686583fa4ade133fa845297f0f342c718 (diff)
download: pyparsing-git-97b30229dcdebddc341df114d9d438431179f4bb.tar.gz
4 files changed, 186 insertions, 60 deletions
diff --git a/CHANGES b/CHANGES
index 6e651db..9a4baa8 100644
--- a/CHANGES
+++ b/CHANGES
@@ -23,8 +23,8 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
       "{" + (Literal("A") | Literal("a") | "") + "}"
 
   Some related changes implemented as part of this work:
-  - Literal("") now internally generates an Empty() (and no longer raises an exception)
-  - Empty is now a subclass of Literal
+  - `Literal("")` now internally generates an `Empty()` (and no longer raises an exception)
+  - `Empty` is now a subclass of `Literal`
 
   Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly.
 
@@ -70,7 +70,7 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
 - Fixed bug in srange, when parsing escaped '/' and '\' inside a
   range set.
 
-- Fixed exception messages for some ParserElements with custom names,
+- Fixed exception messages for some `ParserElements` with custom names,
   which instead showed their contained expression names.
 
 - Multiple added and corrected type annotations. With much help from
diff --git a/examples/booleansearchparser.py b/examples/booleansearchparser.py
index f612379..cefba01 100644
--- a/examples/booleansearchparser.py
+++ b/examples/booleansearchparser.py
@@ -90,11 +90,36 @@ from pyparsing import (
     Suppress,
     OneOrMore,
     one_of,
-    pyparsing_unicode as ppu,
 )
 import re
 
 
+# Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt
+# (includes characters not found in the BasicMultilingualPlane)
+alphabet_ranges = [
+    # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block)
+    [int("0400", 16), int("04FF", 16)],
+    # ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F))
+    [int("0600", 16), int("07FF", 16)],
+    # THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block)
+    [int("0E00", 16), int("0E7F", 16)],
+    # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system (Hiragana (3040–309F) + Katakana (30A0–30FF))
+    [int("3040", 16), int("30FF", 16)],
+    # Enclosed CJK Letters and Months
+    [int("3200", 16), int("32FF", 16)],
+    # CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+    [int("4E00", 16), int("9FFF", 16)],
+    # KOREAN : https://en.wikipedia.org/wiki/Hangul
+    [int("1100", 16), int("11FF", 16)],
+    [int("3130", 16), int("318F", 16)],
+    [int("A960", 16), int("A97F", 16)],
+    [int("AC00", 16), int("D7AF", 16)],
+    [int("D7B0", 16), int("D7FF", 16)],
+    # Halfwidth and Fullwidth Forms
+    [int("FF00", 16), int("FFEF", 16)],
+]
+
+
 class BooleanSearchParser:
     def __init__(self, only_parse=False):
         self._methods = {
@@ -128,8 +153,11 @@ class BooleanSearchParser:
         """
         operatorOr = Forward()
 
+        alphabet = alphanums
+
         # support for non-western alphabets
-        alphabet = ppu.BasicMultilingualPlane.alphanums
+        for lo, hi in alphabet_ranges:
+            alphabet += "".join(chr(c) for c in range(lo, hi + 1) if not chr(c).isspace())
 
         operatorWord = Group(Word(alphabet + "*")).set_results_name("word*")
 
@@ -297,58 +325,66 @@ class ParserTest(BooleanSearchParser):
     def Test(self):
         # fmt: off
         exprs = {
-            0: "help",
-            1: "help or hulp",
-            2: "help and hulp",
-            3: "help hulp",
-            4: "help and hulp or hilp",
-            5: "help or hulp and hilp",
-            6: "help or hulp or hilp or halp",
-            7: "(help or hulp) and (hilp or halp)",
-            8: "help and (hilp or halp)",
-            9: "(help and (hilp or halp)) or hulp",
-            10: "not help",
-            11: "not hulp and halp",
-            12: "not (help and halp)",
-            13: '"help me please"',
-            14: '"help me please" or hulp',
-            15: '"help me please" or (hulp and halp)',
-            16: "help*",
-            17: "help or hulp*",
-            18: "help* and hulp",
-            19: "help and hulp* or hilp",
-            20: "help* or hulp or hilp or halp",
-            21: "(help or hulp*) and (hilp* or halp)",
-            22: "help* and (hilp* or halp*)",
-            23: "(help and (hilp* or halp)) or hulp*",
-            24: "not help* and halp",
-            25: "not (help* and helpe*)",
-            26: '"help* me please"',
-            27: '"help* me* please" or hulp*',
-            28: '"help me please*" or (hulp and halp)',
-            29: '"help me please" not (hulp and halp)',
-            30: '"help me please" hulp',
-            31: "help and hilp and not holp",
-            32: "help hilp not holp",
-            33: "help hilp and not holp",
-            34: "*lp and halp",
-            35: "*신은 and 어떠세요",
-            36: "not 당신은",
-            37: "당신 or 당",
-            38: "亀",
+            "0": "help",
+            "1": "help or hulp",
+            "2": "help and hulp",
+            "3": "help hulp",
+            "4": "help and hulp or hilp",
+            "5": "help or hulp and hilp",
+            "6": "help or hulp or hilp or halp",
+            "7": "(help or hulp) and (hilp or halp)",
+            "8": "help and (hilp or halp)",
+            "9": "(help and (hilp or halp)) or hulp",
+            "10": "not help",
+            "11": "not hulp and halp",
+            "12": "not (help and halp)",
+            "13": '"help me please"',
+            "14": '"help me please" or hulp',
+            "15": '"help me please" or (hulp and halp)',
+            "16": "help*",
+            "17": "help or hulp*",
+            "18": "help* and hulp",
+            "19": "help and hulp* or hilp",
+            "20": "help* or hulp or hilp or halp",
+            "21": "(help or hulp*) and (hilp* or halp)",
+            "22": "help* and (hilp* or halp*)",
+            "23": "(help and (hilp* or halp)) or hulp*",
+            "24": "not help* and halp",
+            "25": "not (help* and helpe*)",
+            "26": '"help* me please"',
+            "27": '"help* me* please" or hulp*',
+            "28": '"help me please*" or (hulp and halp)',
+            "29": '"help me please" not (hulp and halp)',
+            "30": '"help me please" hulp',
+            "31": "help and hilp and not holp",
+            "32": "help hilp not holp",
+            "33": "help hilp and not holp",
+            "34": "*lp and halp",
+            "35": "*신은 and 어떠세요",
         }
 
         texts_matcheswith = {
-            "halp thinks he needs help": [0, 1, 5, 6, 7, 8, 9, 11, 16, 17, 20, 21, 22, 23, 25, 34, 36],
-            "he needs halp": [6, 10, 11, 12, 20, 24, 25, 34, 36],
-            "help": [0, 1, 5, 6, 12, 16, 17, 20, 25, 36],
-            "help hilp": [0, 1, 4, 5, 6, 7, 8, 9, 12, 16, 17, 19, 20, 21, 22, 23, 25, 31, 32, 33, 36],
-            "help me please hulp": [0, 1, 2, 3, 4, 5, 6, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 27, 29, 30, 36],
-            "helper": [10, 12, 16, 20, 36],
-            "hulp hilp": [1, 4, 5, 6, 7, 9, 10, 12, 14, 17, 19, 20, 21, 23, 25, 27, 36],
-            "nothing": [10, 12, 25, 36],
-            "안녕하세요, 당신은 어떠세요?": [10, 12, 25, 35],
-            "亀": [10, 12, 25, 36, 38],
+            "halp thinks he needs help": [
+                "25", "22", "20", "21", "11", "17", "16", "23", "34", "1",
+                "0", "5", "7", "6", "9", "8",
+            ],
+            "he needs halp": ["24", "25", "20", "11", "10", "12", "34", "6"],
+            "help": ["25", "20", "12", "17", "16", "1", "0", "5", "6"],
+            "help hilp": [
+                "25", "22", "20", "32", "21", "12", "17", "16", "19", "31",
+                "23", "1", "0", "5", "4", "7", "6", "9", "8", "33",
+            ],
+            "help me please hulp": [
+                "30", "25", "27", "20", "13", "12", "15", "14", "17", "16",
+                "19", "18", "23", "29", "1", "0", "3", "2", "5", "4", "6", "9",
+            ],
+            "helper": ["20", "10", "12", "16"],
+            "hulp hilp": [
+                "25", "27", "20", "21", "10", "12", "14", "17", "19", "23",
+                "1", "5", "4", "7", "6", "9",
+            ],
+            "nothing": ["25", "10", "12"],
+            "안녕하세요, 당신은 어떠세요?": ["10", "12", "25", "35"],
         }
         # fmt: on
 
@@ -360,7 +396,9 @@ class ParserTest(BooleanSearchParser):
                     _matches.append(_id)
 
             test_passed = sorted(matches) == sorted(_matches)
-            if not test_passed:
+            if test_passed:
+                print("Passed", repr(text))
+            else:
                 print("Failed", repr(text), "expected", matches, "matched", _matches)
 
             all_ok = all_ok and test_passed
@@ -391,7 +429,9 @@ class ParserTest(BooleanSearchParser):
                     _matches.append(_id)
 
             test_passed = sorted(matches) == sorted(_matches)
-            if not test_passed:
+            if test_passed:
+                print("Passed", repr(text))
+            else:
                 print("Failed", repr(text), "expected", matches, "matched", _matches)
 
             all_ok = all_ok and test_passed
@@ -402,11 +442,10 @@ class ParserTest(BooleanSearchParser):
 def main():
     if ParserTest().Test():
         print("All tests OK")
-        exit(0)
     else:
         print("One or more tests FAILED")
-        exit(1)
+        raise Exception("One or more tests FAILED")
 
 
 if __name__ == "__main__":
-    main()
-\ No newline at end of file
+    main()
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index f47f7ff..dd66063 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -121,7 +121,7 @@ class version_info(NamedTuple):
 
 
 __version_info__ = version_info(3, 0, 10, "final", 0)
-__version_time__ = "09 Jul 2022 13:52 UTC"
+__version_time__ = "09 Jul 2022 20:01 UTC"
 __version__ = __version_info__.__version__
 __versionTime__ = __version_time__
 __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 0687f1c..d955c48 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -8313,6 +8313,93 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
                 expr, source, [s.strip() for s in source.split(",")]
             )
 
+    def testDelimitedListParseActions1(self):
+        # from issue #408
+        keyword = pp.Keyword('foobar')
+        untyped_identifier = ~keyword + pp.Word(pp.alphas)
+        dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+        lvalue = pp.Opt(dotted_vars)
+
+        # uncomment this line to see the problem
+        stmt = pp.delimited_list(pp.Opt(dotted_vars))
+        # stmt = delimited_list(dotted_vars)
+        # stmt = pp.Opt(dotted_vars)
+
+        def parse_identifier(toks):
+            print('YAY!', toks)
+
+        untyped_identifier.set_parse_action(parse_identifier)
+
+        save_stdout = StringIO()
+        with contextlib.redirect_stdout(save_stdout):
+            dotted_vars.parse_string('B.C')
+
+        self.assertEqual(
+            dedent("""\
+                YAY! ['B']
+                YAY! ['C']
+                """),
+            save_stdout.getvalue()
+        )
+
+    def testDelimitedListParseActions2(self):
+        # from issue #408
+        keyword = pp.Keyword('foobar')
+        untyped_identifier = ~keyword + pp.Word(pp.alphas)
+        dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+        lvalue = pp.Opt(dotted_vars)
+
+        # uncomment this line to see the problem
+        # stmt = delimited_list(Opt(dotted_vars))
+        stmt = pp.delimited_list(dotted_vars)
+        # stmt = pp.Opt(dotted_vars)
+
+        def parse_identifier(toks):
+            print('YAY!', toks)
+
+        untyped_identifier.set_parse_action(parse_identifier)
+
+        save_stdout = StringIO()
+        with contextlib.redirect_stdout(save_stdout):
+            dotted_vars.parse_string('B.C')
+
+        self.assertEqual(
+            dedent("""\
+                YAY! ['B']
+                YAY! ['C']
+                """),
+            save_stdout.getvalue()
+        )
+
+    def testDelimitedListParseActions3(self):
+        # from issue #408
+        keyword = pp.Keyword('foobar')
+        untyped_identifier = ~keyword + pp.Word(pp.alphas)
+        dotted_vars = pp.delimited_list(untyped_identifier, delim='.')
+        lvalue = pp.Opt(dotted_vars)
+
+        # uncomment this line to see the problem
+        # stmt = delimited_list(Opt(dotted_vars))
+        # stmt = delimited_list(dotted_vars)
+        stmt = pp.Opt(dotted_vars)
+
+        def parse_identifier(toks):
+            print('YAY!', toks)
+
+        untyped_identifier.set_parse_action(parse_identifier)
+
+        save_stdout = StringIO()
+        with contextlib.redirect_stdout(save_stdout):
+            dotted_vars.parse_string('B.C')
+
+        self.assertEqual(
+            dedent("""\
+                YAY! ['B']
+                YAY! ['C']
+                """),
+            save_stdout.getvalue()
+        )
+
     def testEnableDebugOnNamedExpressions(self):
         """
         - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
author	ptmcg <ptmcg@austin.rr.com>	2022-07-09 15:37:04 -0500
committer	ptmcg <ptmcg@austin.rr.com>	2022-07-09 15:37:04 -0500
commit	97b30229dcdebddc341df114d9d438431179f4bb (patch)
tree	4a991abe022fa078d4d7a2aff99209519bccaee5
parent	fe195a849c62e58a8de3274830c3ebc98f892750 (diff)
parent	9751d0c686583fa4ade133fa845297f0f342c718 (diff)
download	pyparsing-git-97b30229dcdebddc341df114d9d438431179f4bb.tar.gz